Browse code

Incorporated functions and data from Fertig et al. (2012)

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/CoGAPS@68145 bc3139a8-67e5-0310-9ffc-ced21a209358

e.fertig authored on 01/08/2012 19:39:49
Showing 8 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: CoGAPS
2
-Version: 1.7.0
2
+Version: 1.7.1
3 3
 Date: 2011-09-02
4 4
 Title: Coordinated Gene Activity in Pattern Sets 
5 5
 Author: Elana J. Fertig
... ...
@@ -10,7 +10,7 @@ Description: Coordinated Gene Activity in Pattern Sets (CoGAPS) infers
10 10
   inferring activity on gene sets. 
11 11
 Maintainer: Elana J. Fertig <ejfertig@jhmi.edu>, Michael F. Ochs <mfo@jhu.edu>
12 12
 SystemRequirements: GAPS-JAGS (==1.0.2)
13
-Depends: R (>= 2.9.0), R.utils (>= 1.2.4)
13
+Depends: R (>= 2.9.0), R.utils (>= 1.2.4), gplots (>=2.8.0)
14 14
 Imports: graphics, grDevices, methods, stats, utils
15 15
 License: GPL (== 2)
16 16
 URL: http://www.cancerbiostats.onc.jhmi.edu/CoGAPS.cfm
... ...
@@ -1,4 +1,4 @@
1 1
 CoGAPS		Algorithm for coordination of activity in gene sets with patterns from GAPS
2 2
 GAPS		MCMC matrix decomposition algorithm
3
-calcCoGASPStat	Computes the CoGAPS gene set statistic
3
+calcCoGASPStat	Computes the CoGAPS gene set statistic 
4 4
 LoadGAPSJAGSLib	Loads in the c++ libraries that perform the GAPS matrix decomposition.  Recommended to be performed before any run of CoGAPS.
... ...
@@ -2,6 +2,10 @@ export(CoGAPS)
2 2
 export(GAPS)
3 3
 export(calcCoGAPSStat)
4 4
 export(plotGAPS)
5
+export(ReadCoGAPSResults)
6
+export(GSTargetHeatmaps)
7
+export(computeGeneGSProb)
8
+
5 9
 if(tools:::.OStype() == "windows") {
6 10
 importFrom(utils, readRegistry, winProgressBar, setWinProgressBar)
7 11
 }
... ...
@@ -12,6 +16,7 @@ importFrom(grDevices, dev.new, dev.off, pdf)
12 16
 importFrom(methods, is)
13 17
 importFrom(stats, heatmap, runif)
14 18
 importFrom(utils, read.table, write.table)
19
+importFrom(gplots, heatmap.2)
15 20
 S3method(coef, jags)
16 21
 S3method(update, jags)
17 22
 S3method(variable.names, jags)
... ...
@@ -1,4 +1,5 @@
1
-CoGAPS requires GAPS-JAGS available from http://www.cancerbiostats.onc.jhmi.edu/cogaps.cfm.  This c++ package is a redistribution of JAGS version 2.1.0 with a module implementing the GAPS matrix decomposition for microarray data.  Please see the installation instructions in the users manual.  If you have any questions, please comment Elana Fertig <ejfertig@jhmi.edu> or Michael Ochs <mfo@jhu.edu>.
1
+CoGAPS requires GAPS-JAGS available from http://www.rits.onc.jhmi.edu/dbb/custom/A6/CoGAPS.cfm or http://astor.som.jhmi.edu/~ejfertig/ejfertig/Software.html.  This c++ package is a redistribution of JAGS version 2.1.0 with a module implementing the GAPS matrix decomposition for microarray data.  Please see the installation instructions in the users manual.  If you have any questions, please comment Elana Fertig <ejfertig@jhmi.edu> or Michael Ochs <mfo@jhu.edu>.
2 2
 
3 3
 01Sep2011 - Removed dependency on rjags package on CRAN
4 4
 02Sep2011 - Included loading of jags libraries upon package loading to avoid setting the LD_LIBRARY_PATH variable
5
+01Aug2012 - Incorporated statistic to quantify inferred membership of each gene in a specified gene set with corresponding examples published in Fertig, Favorov, and Ochs (2012) IEEE Conference on Bioinformatics and Biomedicine (B310). 
5 6
new file mode 100644
... ...
@@ -0,0 +1,102 @@
1
+ReadCoGAPSResults <- function(path=getwd(), output.list=TRUE) {
2
+  origDir <- getwd()
3
+
4
+  message(origDir)
5
+  message('\n')
6
+  
7
+  setwd(path)
8
+
9
+  # identify experiments in the folder
10
+  PMeanFiles <- list.files(pattern='Pmean',full.names=F)
11
+  fileIDS <- sapply(strsplit(PMeanFiles,'\\.'),function(x){x[3]})
12
+  names(PMeanFiles) <- fileIDS
13
+  
14
+  # find other files
15
+  PSDFiles <- paste('Psd','0',fileIDS,'txt',sep=".")
16
+  names(PSDFiles) <- fileIDS
17
+
18
+  AMeanFiles <- paste('Amean','0',fileIDS,'txt',sep=".")
19
+  names(AMeanFiles) <- fileIDS
20
+  
21
+  ASDFiles <- paste('Asd','0',fileIDS,'txt',sep=".")
22
+  names(ASDFiles) <- fileIDS
23
+  
24
+  # check if files are in the folder
25
+  files <-   c(PSDFiles,AMeanFiles,ASDFiles)
26
+  if (!all(file.exists(files))) {
27
+    missingFiles <- files[which(!file.exists(files))]
28
+    stop(paste('Cannot read CoGAPS results: missing files:',
29
+         paste(missingFiles,collapse=",")))
30
+  }
31
+
32
+  # read in data from the files
33
+  A.mean <- list()
34
+  P.mean <- list()
35
+  M <- list()
36
+  A.sd <- list()
37
+  P.sd <- list()
38
+  for (ID in fileIDS) {
39
+    A.mean[[ID]] <- as.matrix(read.table(AMeanFiles[ID], 
40
+                              header=T,row.names=1,sep="\t"))
41
+    A.sd[[ID]] <- as.matrix(read.table(ASDFiles[ID], 
42
+                            header=T,row.names=1,sep="\t"))
43
+
44
+    P.mean[[ID]] <- as.matrix(read.table(PMeanFiles[ID], 
45
+                              header=T,row.names=1,sep="\t"))
46
+    P.sd[[ID]] <- as.matrix(read.table(PSDFiles[ID], 
47
+                            header=T,row.names=1,sep="\t"))
48
+
49
+    M[[ID]] <- A.mean[[ID]]%*%P.mean[[ID]]
50
+
51
+  }
52
+  
53
+  # return to original directory
54
+  setwd(origDir)
55
+
56
+  #return files
57
+  if (output.list) { 
58
+    results <- list(A.mean=A.mean, A.sd=A.sd, P.mean=P.mean, P.sd=P.sd, M=M)
59
+  } else {
60
+    A.mean.matrix <- A.mean[[fileIDS[1]]]
61
+    colnames(A.mean.matrix) <- paste(colnames(A.mean[[fileIDS[1]]]),
62
+       rep(fileIDS[1],ncol(A.mean[[fileIDS[1]]])), sep=".")
63
+    A.sd.matrix <- A.sd[[fileIDS[1]]]
64
+    colnames(A.sd.matrix) <- paste(colnames(A.sd[[fileIDS[1]]]),
65
+       rep(fileIDS[1],ncol(A.sd[[fileIDS[1]]])), sep=".")
66
+    P.mean.matrix <- P.mean[[fileIDS[1]]]
67
+    row.names(P.mean.matrix) <- paste(row.names(P.mean[[fileIDS[1]]]),
68
+       rep(fileIDS[1],nrow(P.mean[[fileIDS[1]]])), sep=".")
69
+    P.sd.matrix <- P.sd[[fileIDS[1]]]
70
+    row.names(P.sd.matrix) <- paste(row.names(P.sd[[fileIDS[1]]]),
71
+       rep(fileIDS[1],nrow(P.sd[[fileIDS[1]]])), sep=".")
72
+    M.matrix <- M[[fileIDS[1]]]
73
+    colnames(M.matrix) <- paste(colnames(M[[fileIDS[1]]]),
74
+       rep(fileIDS[1],ncol(M[[fileIDS[1]]])), sep=".")  
75
+    }
76
+    
77
+    if (length(fileIDS)>1) {
78
+      for (ID in fileIDS[2:length(fileIDS)]) {
79
+
80
+        A.mean.matrix <- cbind(A.mean.matrix, A.mean[[ID]])
81
+        colnames(A.mean.matrix)[(ncol(A.mean.matrix)-ncol(A.mean[[ID]])+1):ncol(A.mean.matrix)] <- paste(colnames(A.mean[[ID]]),rep(ID,ncol(A.mean[[ID]])), sep=".")
82
+        A.sd.matrix <- cbind(A.sd.matrix, A.sd[[ID]])
83
+        colnames(A.sd.matrix)[(ncol(A.sd.matrix)-ncol(A.sd[[ID]])+1):ncol(A.sd.matrix)] <- paste(colnames(A.sd[[ID]]),rep(ID,ncol(A.sd[[ID]])), sep=".")
84
+        
85
+        M.matrix <- cbind(M.matrix, M[[ID]])
86
+        colnames(M.matrix)[(ncol(M.matrix)-ncol(M[[ID]])+1):ncol(M.matrix)] <- paste(colnames(M[[ID]]),rep(ID,ncol(M[[ID]])), sep=".")
87
+        
88
+        P.mean.matrix <- rbind(P.mean.matrix, P.mean[[ID]])
89
+        row.names(P.mean.matrix)[(nrow(P.mean.matrix)-nrow(P.mean[[ID]])+1):nrow(P.mean.matrix)] <- paste(row.names(P.mean[[ID]]),rep(ID,nrow(P.mean[[ID]])), sep=".")
90
+        P.sd.matrix <- rbind(P.sd.matrix, P.sd[[ID]])
91
+        row.names(P.sd.matrix)[(nrow(P.sd.matrix)-nrow(P.sd[[ID]])+1):nrow(P.sd.matrix)] <- paste(row.names(P.sd[[ID]]),rep(ID,nrow(P.sd[[ID]])), sep=".")
92
+        
93
+      }
94
+    }
95
+
96
+    results <- list(A.mean=A.mean.matrix, A.sd=A.sd.matrix,
97
+                    P.mean=P.mean.matrix, P.sd=P.sd.matrix, M=M.matrix)
98
+  }
99
+
100
+  return(results)
101
+  
102
+}
0 103
new file mode 100644
1 104
Binary files /dev/null and b/data/TFSimData.RData differ
... ...
@@ -2,13 +2,33 @@
2 2
 %% http://bibdesk.sourceforge.net/
3 3
 
4 4
 
5
-%% Created for Elana Fertig at 2010-02-04 11:48:54 -0500 
5
+%% Created for Elana Fertig at 2012-08-01 12:57:24 -0400 
6 6
 
7 7
 
8 8
 %% Saved with string encoding Unicode (UTF-8) 
9 9
 
10 10
 
11 11
 
12
+@article{Fertig2010,
13
+	Abstract = {SUMMARY: Coordinated Gene Activity in Pattern Sets (CoGAPS) provides an integrated package for isolating gene expression driven by a biological process, enhancing inference of biological processes from transcriptomic data. CoGAPS improves on other enrichment measurement methods by combining a Markov chain Monte Carlo (MCMC) matrix factorization algorithm (GAPS) with a threshold-independent statistic inferring activity on gene sets. The software is provided as open source C++ code built on top of JAGS software with an R interface. AVAILABILITY: The R package CoGAPS and the C++ package GAPS-JAGS are provided open source under the GNU Lesser Public License (GLPL) with a users manual containing installation and operating instructions. CoGAPS is available through Bioconductor and depends on the rjags package available through CRAN to interface CoGAPS with GAPS-JAGS. URL: http://www.cancerbiostats.onc.jhmi.edu/cogaps.cfm .},
14
+	Author = {Fertig, EJ and Ding, J and Favorov, AV and Parmigiani, G and Ochs, MF},
15
+	Date-Added = {2012-08-01 12:56:23 -0400},
16
+	Date-Modified = {2012-08-01 12:57:24 -0400},
17
+	Doi = {10.1093/bioinformatics/btq503},
18
+	Journal = {Bioinformatics},
19
+	Journal-Full = {Bioinformatics (Oxford, England)},
20
+	Mesh = {Computational Biology; Gene Expression; Gene Expression Profiling; Genomics; Markov Chains; Oligonucleotide Array Sequence Analysis; Software},
21
+	Month = {Nov},
22
+	Number = {21},
23
+	Pages = {2792-3},
24
+	Pmc = {PMC3025742},
25
+	Pmid = {20810601},
26
+	Pst = {ppublish},
27
+	Title = {{CoGAPS: an R/C++ package to identify patterns and biological process activity in transcriptomic data}},
28
+	Volume = {26},
29
+	Year = {2010},
30
+	Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btq503}}
31
+
12 32
 @article{Tavazoie1999,
13 33
 	Abstract = {Technologies to measure whole-genome mRNA abundances and methods to organize and display such data are emerging as valuable tools for systems-level exploration of transcriptional regulatory networks. For instance, it has been shown that mRNA data from 118 genes, measured at several time points in the developing hindbrain of mice, can be hierarchically clustered into various patterns (or 'waves') whose members tend to participate in common processes. We have previously shown that hierarchical clustering can group together genes whose cis-regulatory elements are bound by the same proteins in vivo. Hierarchical clustering has also been used to organize genes into hierarchical dendograms on the basis of their expression across multiple growth conditions. The application of Fourier analysis to synchronized yeast mRNA expression data has identified cell-cycle periodic genes, many of which have expected cis-regulatory elements. Here we apply a systematic set of statistical algorithms, based on whole-genome mRNA data, partitional clustering and motif discovery, to identify transcriptional regulatory sub-networks in yeast-without any a priori knowledge of their structure or any assumptions about their dynamics. This approach uncovered new regulons (sets of co-regulated genes) and their putative cis-regulatory elements. We used statistical characterization of known regulons and motifs to derive criteria by which we infer the biological significance of newly discovered regulons and motifs. Our approach holds promise for the rapid elucidation of genetic network architecture in sequenced organisms in which little biology is known.},
14 34
 	Address = {Department of Genetics, Harvard Medical School, Boston, Massachusetts 02115, USA.},
... ...
@@ -39,7 +59,7 @@
39 59
 	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.},
40 60
 	Rn = {0 (RNA, Messenger); 9007-49-2 (DNA)},
41 61
 	Sb = {IM},
42
-	So = {Nat Genet. 1999 Jul;22(3):281-5. },
62
+	So = {Nat Genet. 1999 Jul;22(3):281-5.},
43 63
 	Stat = {MEDLINE},
44 64
 	Title = {Systematic determination of genetic network architecture.},
45 65
 	Volume = {22},
... ...
@@ -78,7 +98,7 @@
78 98
 	Pst = {ppublish},
79 99
 	Pt = {Comparative Study; Evaluation Studies; Journal Article},
80 100
 	Sb = {IM},
81
-	So = {Bioinformatics. 2007 Apr 15;23(8):980-7. Epub 2007 Feb 15. },
101
+	So = {Bioinformatics. 2007 Apr 15;23(8):980-7. Epub 2007 Feb 15.},
82 102
 	Stat = {MEDLINE},
83 103
 	Title = {Analyzing gene expression data in terms of gene sets: methodological issues.},
84 104
 	Volume = {23},
... ...
@@ -117,7 +137,7 @@
117 137
 	Pst = {ppublish},
118 138
 	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
119 139
 	Sb = {IM},
120
-	So = {Bioinformatics. 2004 Nov 1;20(16):2869-71. Epub 2004 May 14. },
140
+	So = {Bioinformatics. 2004 Nov 1;20(16):2869-71. Epub 2004 May 14.},
121 141
 	Stat = {MEDLINE},
122 142
 	Title = {ClutrFree: cluster tree visualization and interpretation.},
123 143
 	Volume = {20},
... ...
@@ -125,12 +145,12 @@
125 145
 	Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/bth307}}
126 146
 
127 147
 @article{Carvalho2008,
128
-	Author = {Carvalho, C.M. and Chang, J. and Lucas, J. and Nevins, J.R. and Wang, Q. and West, M. },
148
+	Author = {Carvalho, C.M. and Chang, J. and Lucas, J. and Nevins, J.R. and Wang, Q. and West, M.},
129 149
 	Date-Added = {2010-02-02 12:36:36 -0500},
130 150
 	Date-Modified = {2010-02-02 12:36:36 -0500},
131
-	Journal = {J. Am. Stat. Assoc. },
151
+	Journal = {J. Am. Stat. Assoc.},
132 152
 	Pages = {1438 - 1456},
133
-	Title = {High-dimensional sparse factor modelling: Applications in gene  expression genomics},
153
+	Title = {High-dimensional sparse factor modelling: Applications in gene expression genomics},
134 154
 	Volume = {103},
135 155
 	Year = {2008}}
136 156
 
... ...
@@ -197,7 +217,7 @@
197 217
 	Pst = {ppublish},
198 218
 	Pt = {Journal Article},
199 219
 	Sb = {IM},
200
-	So = {Methods Enzymol. 2009;467:59-77. },
220
+	So = {Methods Enzymol. 2009;467:59-77.},
201 221
 	Stat = {MEDLINE},
202 222
 	Title = {Matrix factorization for recovery of biological processes from microarray data.},
203 223
 	Volume = {467},
... ...
@@ -239,7 +259,7 @@
239 259
 	Pst = {ppublish},
240 260
 	Pt = {Journal Article},
241 261
 	Sb = {IM},
242
-	So = {Proc Natl Acad Sci U S A. 2005 Oct 25;102(43):15545-50. Epub 2005 Sep 30. },
262
+	So = {Proc Natl Acad Sci U S A. 2005 Oct 25;102(43):15545-50. Epub 2005 Sep 30.},
243 263
 	Stat = {MEDLINE},
244 264
 	Title = {Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles.},
245 265
 	Volume = {102},
... ...
@@ -278,7 +298,7 @@
278 298
 	Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.},
279 299
 	Rn = {0 (Proteins)},
280 300
 	Sb = {IM},
281
-	So = {Nucleic Acids Res. 2003 Jul 1;31(13):3775-81. },
301
+	So = {Nucleic Acids Res. 2003 Jul 1;31(13):3775-81.},
282 302
 	Stat = {MEDLINE},
283 303
 	Title = {{Onto-Tools, the toolkit of the modern biologist: Onto-Express, Onto-Compare, Onto-Design and Onto-Translate.}},
284 304
 	Volume = {31},
... ...
@@ -330,7 +350,7 @@
330 350
 	Pubm = {Electronic},
331 351
 	Rn = {0 (Saccharomyces cerevisiae Proteins); 0 (Transcription Factors)},
332 352
 	Sb = {IM},
333
-	So = {BMC Bioinformatics. 2006 Feb 28;7:99. },
353
+	So = {BMC Bioinformatics. 2006 Feb 28;7:99.},
334 354
 	Stat = {MEDLINE},
335 355
 	Title = {Determination of strongly overlapping signaling activity from microarray data.},
336 356
 	Volume = {7},
... ...
@@ -367,7 +387,7 @@
367 387
 	Pubm = {Print},
368 388
 	Rn = {0 (Transcription Factors)},
369 389
 	Sb = {T},
370
-	So = {Stud Health Technol Inform. 2007;129(Pt 2):1250-4. },
390
+	So = {Stud Health Technol Inform. 2007;129(Pt 2):1250-4.},
371 391
 	Stat = {MEDLINE},
372 392
 	Title = {Determining transcription factor activity from microarray data using {Bayesian Markov chain Monte Carlo} sampling.},
373 393
 	Volume = {129},
... ...
@@ -402,12 +422,12 @@
402 422
 	Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.},
403 423
 	Pubm = {Print},
404 424
 	Sb = {IM},
405
-	So = {Bioinformatics. 2002 Apr;18(4):566-75. },
425
+	So = {Bioinformatics. 2002 Apr;18(4):566-75.},
406 426
 	Stat = {MEDLINE},
407 427
 	Title = {Application of {B}ayesian decomposition for analysing microarray data.},
408 428
 	Volume = {18},
409 429
 	Year = {2002},
410
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQJGAAAAAAJGAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADD4wxdSCsAAAUWsv4VTW9sb3Nob2tfQkRfWWVhc3QucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABRay/8Xm4ONQREYgQ0FSTwACAAkAAAkgAAAAAAAAAAAAAAAAAAAAATIAABAACAAAw+NSrQAAABEACAAAxecZIwAAAAEAKAUWsv4FFrL9Ajn6RAI5RRoAFB2EABQZOQAP6lwACqQSAAqkEQAAetwAAgCFTWFjaW50b3NoIEhEOlVzZXJzOmVqZmVydGlnOkxpYnJhcnk6TWFpbDpJTUFQLWVqZmVydGlnQG1haWwubCMxNDE5Mzkub3JnOklOQk9YOk9jaHMuaW1hcG1ib3g6QXR0YWNobWVudHM6MzA4NzoyOk1vbG9zaG9rX0JEX1llYXN0LnBkZgAADgAsABUATQBvAGwAbwBzAGgAbwBrAF8AQgBEAF8AWQBlAGEAcwB0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgB6VXNlcnMvZWpmZXJ0aWcvTGlicmFyeS9NYWlsL0lNQVAtZWpmZXJ0aWdAbWFpbC5saXF1aWRkaXJ0Lm9yZy9JTkJPWC9PY2hzLmltYXBtYm94L0F0dGFjaG1lbnRzLzMwODcvMi9Nb2xvc2hva19CRF9ZZWFzdC5wZGYAEwABLwAAFQACAA///wAA0h4fICFYJGNsYXNzZXNaJGNsYXNzbmFtZaMhIiNdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEHEuLi8uLi9MaWJyYXJ5L01haWwvSU1BUC1lamZlcnRpZ0BtYWlsLmxpcXVpZGRpcnQub3JnL0lOQk9YL09jaHMuaW1hcG1ib3gvQXR0YWNobWVudHMvMzA4Ny8yL01vbG9zaG9rX0JEX1llYXN0LnBkZtIeHyYnoicjXE5TRGljdGlvbmFyeQAIABEAGgAfACkAMgA3ADoAPwBBAFMAXABiAGkAcAB4AIMAhQCIAIoAjACPAJEAkwCdAKoArwC3ALkDAwMIAxEDHAMgAy4DNQM+A7IDtwO6AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA8c=}}
430
+	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAkYAAAAAAkYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMPjDF1IKwAABRay/hVNb2xvc2hva19CRF9ZZWFzdC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFFrL/xebg41BERiBDQVJPAAIACQAACSAAAAAAAAAAAAAAAAAAAAABMgAAEAAIAADD41KtAAAAEQAIAADF5xkjAAAAAQAoBRay/gUWsv0COfpEAjlFGgAUHYQAFBk5AA/qXAAKpBIACqQRAAB63AACAIVNYWNpbnRvc2ggSEQ6VXNlcnM6ZWpmZXJ0aWc6TGlicmFyeTpNYWlsOklNQVAtZWpmZXJ0aWdAbWFpbC5sIzE0MTkzOS5vcmc6SU5CT1g6T2Nocy5pbWFwbWJveDpBdHRhY2htZW50czozMDg3OjI6TW9sb3Nob2tfQkRfWWVhc3QucGRmAAAOACwAFQBNAG8AbABvAHMAaABvAGsAXwBCAEQAXwBZAGUAYQBzAHQALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAHpVc2Vycy9lamZlcnRpZy9MaWJyYXJ5L01haWwvSU1BUC1lamZlcnRpZ0BtYWlsLmxpcXVpZGRpcnQub3JnL0lOQk9YL09jaHMuaW1hcG1ib3gvQXR0YWNobWVudHMvMzA4Ny8yL01vbG9zaG9rX0JEX1llYXN0LnBkZgATAAEvAAAVAAIAD///AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBxLi4vLi4vTGlicmFyeS9NYWlsL0lNQVAtZWpmZXJ0aWdAbWFpbC5saXF1aWRkaXJ0Lm9yZy9JTkJPWC9PY2hzLmltYXBtYm94L0F0dGFjaG1lbnRzLzMwODcvMi9Nb2xvc2hva19CRF9ZZWFzdC5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGUAbABvAHEAcwB2AHgAegB8AIYAkwCYAKAC6gLsAvEC+gMFAwkDFwMeAycDmwOgA6MDsAO1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA8c=}}
411 431
 
412 432
 @article{Ochs1999,
413 433
 	Abstract = {A frequent problem in analysis is the need to find two matrices, closely related to the underlying measurement process, which when multiplied together reproduce the matrix of data points. Such problems arise throughout science, for example, in imaging where both the calibration of the sensor and the true scene may be unknown and in localized spectroscopy where multiple components may be present in varying amounts in any spectrum. Since both matrices are unknown, such a decomposition is a bilinear problem. We report here a solution to this problem for the case in which the decomposition results in matrices with elements drawn from positive additive distributions. We demonstrate the power of the methodology on chemical shift images (CSI). The new method, Bayesian spectral decomposition (BSD), reduces the CSI data to a small number of basis spectra together with their localized amplitudes. We apply this new algorithm to a 19F nonlocalized study of the catabolism of 5-fluorouracil in human liver, 31P CSI studies of a human head and calf muscle, and simulations which show its strengths and limitations. In all cases, the dataset, viewed as a matrix with rows containing the individual NMR spectra, results from the multiplication of a matrix of generally nonorthogonal basis spectra (the spectral matrix) by a matrix of the amplitudes of each basis spectrum in the the individual voxels (the amplitude matrix). The results show that BSD can simultaneously determine both the basis spectra and their distribution. In principle, BSD should solve this bilinear problem for any dataset which results from multiplication of matrices representing positive additive distributions if the data overdetermine the solutions.},
... ...
@@ -442,7 +462,7 @@
442 462
 	Pubm = {Print},
443 463
 	Rn = {51-21-8 (Fluorouracil); 56-65-5 (Adenosine Triphosphate); 7439-95-4 (Magnesium)},
444 464
 	Sb = {IM},
445
-	So = {J Magn Reson. 1999 Mar;137(1):161-76. },
465
+	So = {J Magn Reson. 1999 Mar;137(1):161-76.},
446 466
 	Stat = {MEDLINE},
447 467
 	Title = {A new method for spectral decomposition using a bilinear Bayesian approach.},
448 468
 	Volume = {137},
... ...
@@ -497,7 +517,7 @@
497 517
 	Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't},
498 518
 	Rn = {0 (Antineoplastic Agents); 0 (ELK1 protein, human); 0 (Piperazines); 0 (Pyrimidines); 0 (RNA, Messenger); 0 (STAT3 Transcription Factor); 0 (STAT3 protein, human); 0 (TP53 protein, human); 0 (Tumor Suppressor Protein p53); 0 (ets-Domain Protein Elk-1); 152459-95-5 (imatinib)},
499 519
 	Sb = {IM},
500
-	So = {Cancer Res. 2009 Dec 1;69(23):9125-32. Epub 2009 Nov 10. },
520
+	So = {Cancer Res. 2009 Dec 1;69(23):9125-32. Epub 2009 Nov 10.},
501 521
 	Stat = {MEDLINE},
502 522
 	Title = {Detection of treatment-induced changes in signaling pathways in gastrointestinal stromal tumors using transcriptomic data.},
503 523
 	Volume = {69},
... ...
@@ -526,4 +546,4 @@
526 546
 	Title = {Prior distributions on measure space},
527 547
 	Volume = {59},
528 548
 	Year = {1997},
529
-	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUGCQpYJHZlcnNpb25UJHRvcFkkYXJjaGl2ZXJYJG9iamVjdHMSAAGGoNEHCFRyb290gAFfEA9OU0tleWVkQXJjaGl2ZXKoCwwXGBkdJCVVJG51bGzTDQ4PEBEUViRjbGFzc1dOUy5rZXlzWk5TLm9iamVjdHOAB6ISE4ACgAOiFRaABIAGWWFsaWFzRGF0YVxyZWxhdGl2ZVBhdGjSDRobHFdOUy5kYXRhgAVPEQHyAAAAAAHyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADD4wxdSCsAAAAQbOgfU2liaXNpU2tpbGxpbmdfSlJveSMxMDI4OURGLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQKJ38SrsAMAAAAAAAAAAAADAAIAAAkgAAAAAAAAAAAAAAAAAAAAClJlZmVyZW5jZXMAEAAIAADD41KtAAAAEQAIAADEq+hDAAAAAQAQABBs6AAKpCgACqQRAAB63AACAFBNYWNpbnRvc2ggSEQ6VXNlcnM6ZWpmZXJ0aWc6RG9jdW1lbnRzOlJlZmVyZW5jZXM6U2liaXNpU2tpbGxpbmdfSlJveSMxMDI4OURGLnBkZgAOAE4AJgBTAGkAYgBpAHMAaQBTAGsAaQBsAGwAaQBuAGcAXwBKAFIAbwB5AGEAbABTAHQAYQB0AFMAbwBjAEIAXwAxADkAOQA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBKVXNlcnMvZWpmZXJ0aWcvRG9jdW1lbnRzL1JlZmVyZW5jZXMvU2liaXNpU2tpbGxpbmdfSlJveWFsU3RhdFNvY0JfMTk5Ny5wZGYAEwABLwAAFQACAA///wAA0h4fICFYJGNsYXNzZXNaJGNsYXNzbmFtZaMhIiNdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3RfEDouLi8uLi8uLi9SZWZlcmVuY2VzL1NpYmlzaVNraWxsaW5nX0pSb3lhbFN0YXRTb2NCXzE5OTcucGRm0h4fJieiJyNcTlNEaWN0aW9uYXJ5AAgAEQAaAB8AKQAyADcAOgA/AEEAUwBcAGIAaQBwAHgAgwCFAIgAigCMAI8AkQCTAJ0AqgCvALcAuQKvArQCvQLIAswC2gLhAuoDJwMsAy8AAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADPA==}}
549
+	Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAfIAAAAAAfIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMPjDF1IKwAAABBs6B9TaWJpc2lTa2lsbGluZ19KUm95IzEwMjg5REYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAonfxKuwAwAAAAAAAAAAAAMAAgAACSAAAAAAAAAAAAAAAAAAAAAKUmVmZXJlbmNlcwAQAAgAAMPjUq0AAAARAAgAAMSr6EMAAAABABAAEGzoAAqkKAAKpBEAAHrcAAIAUE1hY2ludG9zaCBIRDpVc2VyczplamZlcnRpZzpEb2N1bWVudHM6UmVmZXJlbmNlczpTaWJpc2lTa2lsbGluZ19KUm95IzEwMjg5REYucGRmAA4ATgAmAFMAaQBiAGkAcwBpAFMAawBpAGwAbABpAG4AZwBfAEoAUgBvAHkAYQBsAFMAdABhAHQAUwBvAGMAQgBfADEAOQA5ADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEpVc2Vycy9lamZlcnRpZy9Eb2N1bWVudHMvUmVmZXJlbmNlcy9TaWJpc2lTa2lsbGluZ19KUm95YWxTdGF0U29jQl8xOTk3LnBkZgATAAEvAAAVAAIAD///AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxA6Li4vLi4vLi4vUmVmZXJlbmNlcy9TaWJpc2lTa2lsbGluZ19KUm95YWxTdGF0U29jQl8xOTk3LnBkZtIcHSQloiUhXE5TRGljdGlvbmFyeRIAAYagXxAPTlNLZXllZEFyY2hpdmVyAAgAEQAWAB8AKAAyADUAOgA8AEUASwBSAF0AZQBsAG8AcQBzAHYAeAB6AHwAhgCTAJgAoAKWApgCnQKmArECtQLDAsoC0wMQAxUDGAMlAyoAAAAAAAACAQAAAAAAAAAoAAAAAAAAAAAAAAAAAAADPA==}}
530 550
new file mode 100644
... ...
@@ -0,0 +1,17 @@
1
+\name{TFGeneReg}
2
+\docType{data}
3
+\alias{TFGeneReg}
4
+\alias{TFSimData}
5
+\alias{GSSimData}
6
+\title{Simulated dataset to quantify gene set membership.}
7
+\description{Simulated data and components used to generate it resulting from the differential activity of four simulated gene sets (\code{TFGeneReg$TFGeneReg}) in different samples (\code{TFGeneReg$P}).}
8
+\usage{TFGeneReg}
9
+\format{A \code{\link{list}} containing:
10
+	  \item{A}{Matrix of 100 rows and 4 columns representing the simulated amplitude matrix for activity of each of the four simulated gene sets in each pattern.}
11
+	  \item{D}{Matrix of 100 rows and 20 columns containing simulated data generated with \code{M + 0.1*pmax(TFGeneReg$M,1)*matrix(rnorm(length(TFGeneReg$M)),nrow=nrow(TFGeneReg$M))}.}
12
+	  \item{M}{Matrix of 100 rows and 20 columns containing noise-free simulated data generated with \code{TFGeneReg$A %*% TFGeneReg$P}.}
13
+	  \item{P}{Matrix of 4 columns and 20 columns representing relative activity of each of the four gene sets in \code{TFGeneReg$TFGeneReg} in each of the 20 samples.}
14
+	  \item{TFGeneReg}{List containing genes and relative activity for each of four gene sets used to formulate the amplitude matrix \code{TFGeneReg$A}.}
15
+}
16
+\references{EJ Fertig, AV Favorov, and Ochs MF (2012) Identifying context-specific transcription factor targets from prior knowledge and gene expression data. 2012 IEEE International Conference on Bioinformatics and Biomedicine.}
17
+\keyword{datasets}