%% This BibTeX bibliography file was created using BibDesk. %% http://bibdesk.sourceforge.net/ %% Created for Elana Fertig at 2012-08-02 15:30:36 -0400 %% Saved with string encoding Unicode (UTF-8) @conference{Fertig2012, Address = {Philadelphia, PA, USA}, Author = {EJ Fertig and AV Favorov and MF Ochs}, Booktitle = {IEEE International Conference on Bioinformatics and Biomedicine}, Date-Added = {2012-08-02 15:29:22 -0400}, Date-Modified = {2012-08-02 15:30:29 -0400}, Number = {B310}, Title = {Identifying context-specific transcription factor targets from prior knowledge and gene expression data.}, Year = {2012}} @incollection{Ochs2003, Address = {New York}, Author = {MF Ochs}, Booktitle = {The Analysis of Gene Expression Data: Methods and Software}, Date-Added = {2012-08-02 15:27:27 -0400}, Date-Modified = {2012-08-02 15:29:07 -0400}, Editor = {G Parmigiani and E Garrett and R Irizarry and S Zeger}, Publisher = {Springer-Verlag}, Title = {Bayesian Decomposition}, Year = {2003}} @article{Fertig2010, Abstract = {SUMMARY: Coordinated Gene Activity in Pattern Sets (CoGAPS) provides an integrated package for isolating gene expression driven by a biological process, enhancing inference of biological processes from transcriptomic data. CoGAPS improves on other enrichment measurement methods by combining a Markov chain Monte Carlo (MCMC) matrix factorization algorithm (GAPS) with a threshold-independent statistic inferring activity on gene sets. The software is provided as open source C++ code built on top of JAGS software with an R interface. AVAILABILITY: The R package CoGAPS and the C++ package GAPS-JAGS are provided open source under the GNU Lesser Public License (GLPL) with a users manual containing installation and operating instructions. CoGAPS is available through Bioconductor and depends on the rjags package available through CRAN to interface CoGAPS with GAPS-JAGS. URL: http://www.cancerbiostats.onc.jhmi.edu/cogaps.cfm .}, Author = {Fertig, EJ and Ding, J and Favorov, AV and Parmigiani, G and Ochs, MF}, Date-Added = {2012-08-01 12:56:23 -0400}, Date-Modified = {2012-08-01 12:57:24 -0400}, Doi = {10.1093/bioinformatics/btq503}, Journal = {Bioinformatics}, Journal-Full = {Bioinformatics (Oxford, England)}, Mesh = {Computational Biology; Gene Expression; Gene Expression Profiling; Genomics; Markov Chains; Oligonucleotide Array Sequence Analysis; Software}, Month = {Nov}, Number = {21}, Pages = {2792-3}, Pmc = {PMC3025742}, Pmid = {20810601}, Pst = {ppublish}, Title = {{CoGAPS: an R/C++ package to identify patterns and biological process activity in transcriptomic data}}, Volume = {26}, Year = {2010}, Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btq503}} @article{Tavazoie1999, Abstract = {Technologies to measure whole-genome mRNA abundances and methods to organize and display such data are emerging as valuable tools for systems-level exploration of transcriptional regulatory networks. For instance, it has been shown that mRNA data from 118 genes, measured at several time points in the developing hindbrain of mice, can be hierarchically clustered into various patterns (or 'waves') whose members tend to participate in common processes. We have previously shown that hierarchical clustering can group together genes whose cis-regulatory elements are bound by the same proteins in vivo. Hierarchical clustering has also been used to organize genes into hierarchical dendograms on the basis of their expression across multiple growth conditions. The application of Fourier analysis to synchronized yeast mRNA expression data has identified cell-cycle periodic genes, many of which have expected cis-regulatory elements. Here we apply a systematic set of statistical algorithms, based on whole-genome mRNA data, partitional clustering and motif discovery, to identify transcriptional regulatory sub-networks in yeast-without any a priori knowledge of their structure or any assumptions about their dynamics. This approach uncovered new regulons (sets of co-regulated genes) and their putative cis-regulatory elements. We used statistical characterization of known regulons and motifs to derive criteria by which we infer the biological significance of newly discovered regulons and motifs. Our approach holds promise for the rapid elucidation of genetic network architecture in sequenced organisms in which little biology is known.}, Address = {Department of Genetics, Harvard Medical School, Boston, Massachusetts 02115, USA.}, Au = {Tavazoie, S and Hughes, JD and Campbell, MJ and Cho, RJ and Church, GM}, Author = {Tavazoie, S. and Hughes, J.D. and Campbell, M.J. and Cho, R.J. and Church, G.M.}, Cin = {Nat Genet. 1999 Jul;22(3):213-5. PMID: 10391202}, Crdt = {1999/07/03 10:00}, Da = {19990719}, Date-Added = {2010-02-04 11:43:20 -0500}, Date-Modified = {2010-02-04 11:48:43 -0500}, Dcom = {19990719}, Doi = {10.1038/10343}, Edat = {1999/07/03 10:00}, Issn = {1061-4036 (Print); 1061-4036 (Linking)}, Jid = {9216904}, Journal = {Nat Genet}, Jt = {Nature genetics}, Language = {eng}, Lr = {20061115}, Mh = {Animals; Cell Cycle/genetics; DNA/genetics; Gene Expression; *Genetic Techniques; Mice; Multigene Family; Open Reading Frames; RNA, Messenger/genetics/metabolism; Rhombencephalon/growth \& development/metabolism; Saccharomyces cerevisiae/cytology/genetics/metabolism}, Mhda = {2001/03/23 10:01}, Number = {3}, Own = {NLM}, Pages = {281--285}, Pl = {UNITED STATES}, Pmid = {10391217}, Pst = {ppublish}, Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.}, Rn = {0 (RNA, Messenger); 9007-49-2 (DNA)}, Sb = {IM}, So = {Nat Genet. 1999 Jul;22(3):281-5.}, Stat = {MEDLINE}, Title = {Systematic determination of genetic network architecture.}, Volume = {22}, Year = {1999}, Bdsk-Url-1 = {http://dx.doi.org/10.1038/10343}} @article{Goeman2007, Abstract = {MOTIVATION: Many statistical tests have been proposed in recent years for analyzing gene expression data in terms of gene sets, usually from Gene Ontology. These methods are based on widely different methodological assumptions. Some approaches test differential expression of each gene set against differential expression of the rest of the genes, whereas others test each gene set on its own. Also, some methods are based on a model in which the genes are the sampling units, whereas others treat the subjects as the sampling units. This article aims to clarify the assumptions behind different approaches and to indicate a preferential methodology of gene set testing. RESULTS: We identify some crucial assumptions which are needed by the majority of methods. P-values derived from methods that use a model which takes the genes as the sampling unit are easily misinterpreted, as they are based on a statistical model that does not resemble the biological experiment actually performed. Furthermore, because these models are based on a crucial and unrealistic independence assumption between genes, the P-values derived from such methods can be wildly anti-conservative, as a simulation experiment shows. We also argue that methods that competitively test each gene set against the rest of the genes create an unnecessary rift between single gene testing and gene set testing.}, Address = {Department of Medical Statistics and Bioinformatics, Leiden University Medical Center, Leiden, The Netherlands. j.j.goeman@lumc.nl}, Au = {Goeman, JJ and Buhlmann, P}, Author = {Goeman, J.J. and Buhlmann, P.}, Crdt = {2007/02/17 09:00}, Da = {20070501}, Date-Added = {2010-02-04 11:42:46 -0500}, Date-Modified = {2010-02-04 11:48:51 -0500}, Dcom = {20070522}, Dep = {20070215}, Doi = {10.1093/bioinformatics/btm051}, Edat = {2007/02/17 09:00}, Issn = {1367-4811 (Electronic); 1367-4803 (Linking)}, Jid = {9808944}, Journal = {Bioinformatics}, Jt = {Bioinformatics (Oxford, England)}, Keywords = {gene sets}, Language = {eng}, Lr = {20091104}, Mh = {*Algorithms; *Artifacts; *Data Interpretation, Statistical; *Databases, Genetic; Gene Expression Profiling/*methods; Information Storage and Retrieval/*methods; Reproducibility of Results; Sensitivity and Specificity}, Mhda = {2007/05/23 09:00}, Number = {8}, Own = {NLM}, Pages = {980--987}, Phst = {2007/02/15 {$[$}aheadofprint{$]$}}, Pii = {btm051}, Pl = {England}, Pmid = {17303618}, Pst = {ppublish}, Pt = {Comparative Study; Evaluation Studies; Journal Article}, Sb = {IM}, So = {Bioinformatics. 2007 Apr 15;23(8):980-7. Epub 2007 Feb 15.}, Stat = {MEDLINE}, Title = {Analyzing gene expression data in terms of gene sets: methodological issues.}, Volume = {23}, Year = {2007}, Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/btm051}} @article{Bidaut2004, Abstract = {ClutrFree facilitates the visualization and interpretation of clusters or patterns computed from microarray data through a graphical user interface that displays patterns, membership information of the genes and annotation statistics simultaneously. ClutrFree creates a tree linking the patterns based on similarity, permitting the navigation among patterns identified by different algorithms or by the same algorithm with different parameters, and aids the inferring of conclusions from a microarray experiment. AVAILABILITY: The ClutrFree Java source code and compiled bytecode are available as a package under the GNU General Public License at http://bioinformatics.fccc.edu}, Address = {Division of Population Science, Fox Chase Cancer Center, 333 Cottman Avenue, Philadelphia, PA 19111, USA.}, Au = {Bidaut, G and Ochs, MF}, Author = {Bidaut, G. and Ochs, M.F.}, Crdt = {2004/05/18 05:00}, Da = {20041101}, Date-Added = {2010-02-02 12:36:53 -0500}, Date-Modified = {2010-02-02 16:00:15 -0500}, Dcom = {20050210}, Dep = {20040514}, Doi = {10.1093/bioinformatics/bth307}, Edat = {2004/05/18 05:00}, Gr = {CA06927/CA/NCI NIH HHS/United States}, Issn = {1367-4803 (Print); 1367-4803 (Linking)}, Jid = {9808944}, Journal = {Bioinformatics}, Jt = {Bioinformatics (Oxford, England)}, Language = {eng}, Lr = {20071114}, Mh = {*Algorithms; *Cluster Analysis; Computer Graphics; Oligonucleotide Array Sequence Analysis/*methods; Sequence Alignment/*methods; Sequence Analysis, DNA/*methods; *Software; *User-Computer Interface}, Mhda = {2005/02/11 09:00}, Number = {16}, Own = {NLM}, Pages = {2869--2871}, Phst = {2004/05/14 {$[$}aheadofprint{$]$}}, Pii = {bth307}, Pl = {England}, Pmid = {15145813}, Pst = {ppublish}, Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.}, Sb = {IM}, So = {Bioinformatics. 2004 Nov 1;20(16):2869-71. Epub 2004 May 14.}, Stat = {MEDLINE}, Title = {ClutrFree: cluster tree visualization and interpretation.}, Volume = {20}, Year = {2004}, Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/bth307}} @article{Carvalho2008, Author = {Carvalho, C.M. and Chang, J. and Lucas, J. and Nevins, J.R. and Wang, Q. and West, M.}, Date-Added = {2010-02-02 12:36:36 -0500}, Date-Modified = {2010-02-02 12:36:36 -0500}, Journal = {J. Am. Stat. Assoc.}, Pages = {1438 - 1456}, Title = {High-dimensional sparse factor modelling: Applications in gene expression genomics}, Volume = {103}, Year = {2008}} @article{Lee1999, Abstract = {Is perception of the whole based on perception of its parts? There is psychological and physiological evidence for parts-based representations in the brain, and certain computational theories of object recognition rely on such representations. But little is known about how brains or computers might learn the parts of objects. Here we demonstrate an algorithm for non-negative matrix factorization that is able to learn parts of faces and semantic features of text. This is in contrast to other methods, such as principal components analysis and vector quantization, that learn holistic, not parts-based, representations. Non-negative matrix factorization is distinguished from the other methods by its use of non-negativity constraints. These constraints lead to a parts-based representation because they allow only additive, not subtractive, combinations. When non-negative matrix factorization is implemented as a neural network, parts-based representations emerge by virtue of two properties: the firing rates of neurons are never negative and synaptic strengths do not change sign.}, Address = {Bell Laboratories, Lucent Technologies, Murray Hill, New Jersey 07974, USA.}, Author = {Lee, D.D. and Seung, H.S.}, Cin = {Nature. 1999 Oct 21;401(6755):759. PMID: 10548097; Nature. 1999 Oct 21;401(6755):759-60. PMID: 10548098}, Crdt = {1999/11/05 08:00}, Da = {19991116}, Date = {1999 Oct 21}, Date-Added = {2010-02-02 12:35:49 -0500}, Date-Modified = {2010-02-02 15:57:17 -0500}, Dcom = {19991116}, Doi = {10.1038/44565}, Edat = {1999/11/05 08:00}, Issn = {0028-0836 (Print)}, Jid = {0410462}, Journal = {Nature}, Jt = {Nature}, Language = {eng}, Lr = {20061115}, Mh = {*Algorithms; Face; Humans; *Learning; Models, Neurological; Perception/physiology; Semantics}, Mhda = {2001/03/23 10:01}, Number = {6755}, Own = {NLM}, Pages = {788--791}, Pl = {ENGLAND}, Pmid = {10548103}, Pst = {ppublish}, Pt = {Journal Article; Research Support, Non-U.S. Gov't}, Sb = {IM}, Status = {MEDLINE}, Title = {Learning the parts of objects by non-negative matrix factorization.}, Volume = {401}, Year = {1999}, Bdsk-Url-1 = {http://dx.doi.org/10.1038/44565}} @article{Kossenkov2009, Abstract = {We explore a number of matrix factorization methods in terms of their ability to identify signatures of biological processes in a large gene expression study. We focus on the ability of these methods to find signatures in terms of gene ontology enhancement and on the interpretation of these signatures in the samples. Two Bayesian approaches, Bayesian Decomposition (BD) and Bayesian Factor Regression Modeling (BFRM), perform best. Differences in the strength of the signatures between the samples suggest that BD will be most useful for systems modeling and BFRM for biomarker discovery.}, Address = {The Wistar Institute, Philadelphia, Pennsylvania, USA.}, Au = {Kossenkov, AV and Ochs, MF}, Author = {Kossenkov, A.V. and Ochs, M.F.}, Crdt = {2009/11/10 06:00}, Da = {20091109}, Date-Added = {2010-02-02 12:33:48 -0500}, Date-Modified = {2010-02-02 15:55:21 -0500}, Dcom = {20100111}, Doi = {10.1016/S0076-6879(09)67003-8}, Edat = {2009/11/10 06:00}, Issn = {1557-7988 (Electronic); 1557-7988 (Linking)}, Jid = {0212271}, Journal = {Methods Enzymol}, Jt = {Methods in enzymology}, Keywords = {Markov Chain Monte Carlo, matrix factorization}, Language = {eng}, Mh = {Algorithms; Bayes Theorem; Biological Processes/physiology; *Cluster Analysis; *Data Interpretation, Statistical; Gene Expression Profiling/*methods; Gene Regulatory Networks; Microarray Analysis/*methods; Pattern Recognition, Automated/methods; Saccharomyces cerevisiae/genetics/physiology}, Mhda = {2010/01/12 06:00}, Own = {NLM}, Pages = {59--77}, Pii = {S0076-6879(09)67003-8}, Pl = {United States}, Pmid = {19897089}, Pst = {ppublish}, Pt = {Journal Article}, Sb = {IM}, So = {Methods Enzymol. 2009;467:59-77.}, Stat = {MEDLINE}, Title = {Matrix factorization for recovery of biological processes from microarray data.}, Volume = {467}, Year = {2009}, Bdsk-Url-1 = {http://dx.doi.org/10.1016/S0076-6879(09)67003-8}} @article{Subramanian2005, Abstract = {Although genomewide RNA expression analysis has become a routine tool in biomedical research, extracting biological insight from such information remains a major challenge. Here, we describe a powerful analytical method called Gene Set Enrichment Analysis (GSEA) for interpreting gene expression data. The method derives its power by focusing on gene sets, that is, groups of genes that share common biological function, chromosomal location, or regulation. We demonstrate how GSEA yields insights into several cancer-related data sets, including leukemia and lung cancer. Notably, where single-gene analysis finds little similarity between two independent studies of patient survival in lung cancer, GSEA reveals many biological pathways in common. The GSEA method is embodied in a freely available software package, together with an initial database of 1,325 biologically defined gene sets.}, Address = {Broad Institute of Massachusetts Institute of Technology and Harvard, 320 Charles Street, Cambridge, MA 02141, USA.}, Au = {Subramanian, A and Tamayo, P and Mootha, VK and Mukherjee, S and Ebert, BL and Gillette, MA and Paulovich, A and Pomeroy, SL and Golub, TR and Lander, ES and Mesirov, JP}, Author = {Subramanian, A. and Tamayo, P. and Mootha, V.K. and Mukherjee, S. and Ebert, B.L. and Gillette, M.A. and Paulovich, A. and Pomeroy, S.L. and Golub, T.R. and Lander, E.S. and Mesirov, J.P.}, Cin = {Proc Natl Acad Sci U S A. 2005 Oct 25;102(43):15278-9. PMID: 16230612}, Crdt = {2005/10/04 09:00}, Da = {20051026}, Date-Added = {2010-02-01 16:29:02 -0500}, Date-Modified = {2010-02-02 15:59:33 -0500}, Dcom = {20051212}, Dep = {20050930}, Doi = {10.1073/pnas.0506580102}, Edat = {2005/10/04 09:00}, Issn = {0027-8424 (Print); 0027-8424 (Linking)}, Jid = {7505876}, Journal = {Proc Natl Acad Sci}, Jt = {Proceedings of the National Academy of Sciences of the United States of America}, Keywords = {gene set}, Language = {eng}, Lr = {20091118}, Mh = {Cell Line, Tumor; Female; Gene Expression Profiling/*methods; Genes, p53/physiology; Genome; Humans; Leukemia, Myeloid, Acute/genetics; Lung Neoplasms/genetics/mortality; Male; *Oligonucleotide Array Sequence Analysis; Precursor Cell Lymphoblastic Leukemia-Lymphoma/genetics}, Mhda = {2005/12/15 09:00}, Number = {43}, Oid = {NLM: PMC1239896}, Own = {NLM}, Pages = {15545--15550}, Phst = {2005/09/30 {$[$}aheadofprint{$]$}}, Pii = {0506580102}, Pl = {United States}, Pmc = {PMC1239896}, Pmid = {16199517}, Pst = {ppublish}, Pt = {Journal Article}, Sb = {IM}, So = {Proc Natl Acad Sci U S A. 2005 Oct 25;102(43):15545-50. Epub 2005 Sep 30.}, Stat = {MEDLINE}, Title = {Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles.}, Volume = {102}, Year = {2005}, Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0506580102}} @article{Draghici2003, Abstract = {Onto-Tools is a set of four seamlessly integrated databases: Onto-Express, Onto-Compare, Onto-Design and Onto-Translate. Onto-Express is able to automatically translate lists of genes found to be differentially regulated in a given condition into functional profiles characterizing the impact of the condition studied upon various biological processes and pathways. OE constructs functional profiles (using Gene Ontology terms) for the following categories: biochemical function, biological process, cellular role, cellular component, molecular function and chromosome location. Statistical significance values are calculated for each category. Once the initial exploratory analysis identified a number of relevant biological processes, specific mechanisms of interactions can be hypothesized for the conditions studied. Currently, many commercial arrays are available for the investigation of specific mechanisms. Each such array is characterized by a biological bias determined by the extent to which the genes present on the array represent specific pathways. Onto-Compare is a tool that allows efficient comparisons of any sets of commercial or custom arrays. Using Onto-Compare, a researcher can determine quickly which array, or set of arrays, covers best the hypotheses studied. In many situations, no commercial arrays are available for specific biological mechanisms. Onto-Design is a tool that allows the user to select genes that represent given functional categories. Onto-Translate allows the user to translate easily lists of accession numbers, UniGene clusters and Affymetrix probes into one another. All tools above are seamlessly integrated. The Onto-Tools are available online at http://vortex.cs.wayne.edu/Projects.html.}, Address = {Department of Computer Science, Wayne State University, 431 State Hall, Detroit, MI 48202, USA. sod@cs.wayne.edu}, Au = {Draghici, S and Khatri, P and Bhavsar, P and Shah, A and Krawetz, SA and Tainsky, MA}, Author = {Draghici, S. and Khatri, P. and Bhavsar, P. and Shah, A. and Krawetz, S.A. and Tainsky, M.A.}, Crdt = {2003/06/26 05:00}, Da = {20030625}, Date-Added = {2010-02-01 16:28:22 -0500}, Date-Modified = {2010-02-02 15:56:55 -0500}, Dcom = {20030818}, Edat = {2003/06/26 05:00}, Gr = {R01-NS045207-01/NS/NINDS NIH HHS/United States; R21-EB000990-01/EB/NIBIB NIH HHS/United States}, Issn = {1362-4962 (Electronic); 1362-4962 (Linking)}, Jid = {0411011}, Journal = {Nucleic Acids Res}, Jt = {Nucleic acids research}, Keywords = {gene set}, Language = {eng}, Lr = {20091118}, Mh = {Databases, Nucleic Acid; Gene Expression Profiling/*methods; Internet; Oligonucleotide Array Sequence Analysis/*methods; Proteins/genetics/physiology; *Software; Systems Integration}, Mhda = {2003/08/19 05:00}, Number = {13}, Oid = {NLM: PMC169030}, Own = {NLM}, Pages = {3775--3781}, Pl = {England}, Pmc = {PMC169030}, Pmid = {12824416}, Pst = {ppublish}, Pt = {Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, Non-P.H.S.; Research Support, U.S. Gov't, P.H.S.}, Rn = {0 (Proteins)}, Sb = {IM}, So = {Nucleic Acids Res. 2003 Jul 1;31(13):3775-81.}, Stat = {MEDLINE}, Title = {{Onto-Tools, the toolkit of the modern biologist: Onto-Express, Onto-Compare, Onto-Design and Onto-Translate.}}, Volume = {31}, Year = {2003}} @inbook{Ochs2006, Address = {London}, Author = {M.F. Ochs}, Date-Added = {2010-01-08 14:45:36 -0500}, Date-Modified = {2010-01-08 14:48:01 -0500}, Editor = {G. Parmigiani and E. S. Garrett and R. A. Irizarry and S. L. Zeger}, Pages = {388-408}, Publisher = {Springer-Verlag}, Series = {Statistics for Biology and Health}, Title = {The Analysis of Gene Expression Data The Analysis of Gene Expression Data: Methods and Software}, Year = {2006}} @article{Bidaut2006, Abstract = {BACKGROUND: As numerous diseases involve errors in signal transduction, modern therapeutics often target proteins involved in cellular signaling. Interpretation of the activity of signaling pathways during disease development or therapeutic intervention would assist in drug development, design of therapy, and target identification. Microarrays provide a global measure of cellular response, however linking these responses to signaling pathways requires an analytic approach tuned to the underlying biology. An ongoing issue in pattern recognition in microarrays has been how to determine the number of patterns (or clusters) to use for data interpretation, and this is a critical issue as measures of statistical significance in gene ontology or pathways rely on proper separation of genes into groups. RESULTS: Here we introduce a method relying on gene annotation coupled to decompositional analysis of global gene expression data that allows us to estimate specific activity on strongly coupled signaling pathways and, in some cases, activity of specific signaling proteins. We demonstrate the technique using the Rosetta yeast deletion mutant data set, decompositional analysis by Bayesian Decomposition, and annotation analysis using ClutrFree. We determined from measurements of gene persistence in patterns across multiple potential dimensionalities that 15 basis vectors provides the correct dimensionality for interpreting the data. Using gene ontology and data on gene regulation in the Saccharomyces Genome Database, we identified the transcriptional signatures of several cellular processes in yeast, including cell wall creation, ribosomal disruption, chemical blocking of protein synthesis, and, critically, individual signatures of the strongly coupled mating and filamentation pathways. CONCLUSION: This works demonstrates that microarray data can provide downstream indicators of pathway activity either through use of gene ontology or transcription factor databases. This can be used to investigate the specificity and success of targeted therapeutics as well as to elucidate signaling activity in normal and disease processes.}, Address = {Fox Chase Cancer Center, 333 Cottman Avenue, Philadelphia, PA 19111, USA. ghbidaut@pcbi.upenn.edu}, Au = {Bidaut, G and Suhre, K and Claverie, JM and Ochs, MF}, Author = {Bidaut, G. and Suhre, K. and Claverie, J.-M. and Ochs, M.F.}, Da = {20060327}, Date-Added = {2010-01-08 14:39:10 -0500}, Date-Modified = {2010-01-08 14:39:10 -0500}, Dcom = {20060426}, Dep = {20060228}, Doi = {10.1186/1471-2105-7-99}, Edat = {2006/03/02 09:00}, Gr = {CA06927/CA/United States NCI; LM008309/LM/United States NLM}, Issn = {1471-2105 (Electronic)}, Jid = {100965194}, Journal = {BMC Bioinformatics}, Jt = {BMC bioinformatics}, Keywords = {Bayesian, Markov Chain Monte Carlo, signaling network, microarray, k25}, Language = {eng}, Lr = {20071114}, Mh = {Algorithms; Computer Simulation; Gene Expression Profiling/*methods; *Models, Biological; Oligonucleotide Array Sequence Analysis/*methods; Pattern Recognition, Automated/*methods; Saccharomyces cerevisiae Proteins/genetics/*metabolism; Signal Transduction/*physiology; Transcription Factors/genetics/*metabolism}, Mhda = {2006/04/28 09:00}, Own = {NLM}, Pages = {99}, Phst = {2005/09/22 {$[$}received{$]$}; 2006/02/28 {$[$}accepted{$]$}; 2006/02/28 {$[$}aheadofprint{$]$}}, Pii = {1471-2105-7-99}, Pl = {England}, Pmc = {PMC1413561}, Pmid = {16507110}, Pst = {epublish}, Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't}, Pubm = {Electronic}, Rn = {0 (Saccharomyces cerevisiae Proteins); 0 (Transcription Factors)}, Sb = {IM}, So = {BMC Bioinformatics. 2006 Feb 28;7:99.}, Stat = {MEDLINE}, Title = {Determination of strongly overlapping signaling activity from microarray data.}, Volume = {7}, Year = {2006}, Bdsk-Url-1 = {http://dx.doi.org/10.1186/1471-2105-7-99}} @article{Kossenkov2007a, Abstract = {Many biological processes rely on remodeling of the transcriptional response of cells through activation of transcription factors. Although determination of the activity level of transcription factors from microarray data can provide insight into developmental and disease processes, it requires careful analysis because of the multiple regulation of genes. We present a novel approach that handles both the assignment of genes to multiple patterns, as required by multiple regulation, and the linking of genes in prior probability distributions according to their known transcriptional regulators. We demonstrate the power of this approach in simulations and by application to yeast cell cycle and deletion mutant data. The results of simulations in the presence of increasing noise showed improved recovery of patterns in terms of chi2 fit. Analysis of the yeast data led to improved inference of biologically meaningful groups in comparison to other techniques, as demonstrated with ROC analysis. The new algorithm provides an approach for estimating the levels of transcription factor activity from microarray data, and therefore provides insights into biological response.}, Address = {The Wistar Institute, Philadelphia, PA, USA.}, Au = {Kossenkov, AV and Peterson, AJ and Ochs, MF}, Author = {A.V. Kossenkov and A.J. Peterson and M.F. Ochs}, Da = {20071003}, Date-Added = {2010-01-08 14:39:10 -0500}, Date-Modified = {2010-01-08 14:39:10 -0500}, Dcom = {20071102}, Edat = {2007/10/04 09:00}, Gr = {CA06973/CA/United States NCI; LM008309/LM/United States NLM}, Issn = {0926-9630 (Print)}, Jid = {9214582}, Journal = {Stud Health Technol Inform}, Jt = {Studies in health technology and informatics}, Keywords = {Bayesian, Markov Chain Monte Carlo, microarray}, Language = {eng}, Lr = {20080710}, Mh = {*Algorithms; Bayes Theorem; Computational Biology; *Gene Expression Regulation; Markov Chains; Models, Genetic; Monte Carlo Method; *Oligonucleotide Array Sequence Analysis; ROC Curve; Transcription Factors/*metabolism; Transcription, Genetic; Yeasts/genetics}, Mhda = {2007/11/06 09:00}, Number = {Pt 2}, Own = {NLM}, Pages = {1250--1254}, Pl = {Netherlands}, Pmid = {17911915}, Pst = {ppublish}, Pt = {Journal Article; Research Support, N.I.H., Extramural}, Pubm = {Print}, Rn = {0 (Transcription Factors)}, Sb = {T}, So = {Stud Health Technol Inform. 2007;129(Pt 2):1250-4.}, Stat = {MEDLINE}, Title = {Determining transcription factor activity from microarray data using {Bayesian Markov chain Monte Carlo} sampling.}, Volume = {129}, Year = {2007}} @article{Moloshok2002, Abstract = {MOTIVATION: Microarray and gene chip technology provide high throughput tools for measuring gene expression levels in a variety of circumstances, including cellular response to drug treatment, cellular growth and development, tumorigenesis, among many other processes. In order to interpret the large data sets generated in experiments, data analysis techniques that consider biological knowledge during analysis will be extremely useful. We present here results showing the application of such a tool to expression data from yeast cell cycle experiments. RESULTS: Originally developed for spectroscopic analysis, Bayesian Decomposition (BD) includes two features which make it useful for microarray data analysis: the ability to assign genes to multiple coexpression groups and the ability to encode biological knowledge into the system. Here we demonstrate the ability of the algorithm to provide insight into the yeast cell cycle, including identification of five temporal patterns tied to cell cycle phases as well as the identification of a pattern tied to an approximately 40 min cell cycle oscillator. The genes are simultaneously assigned to the patterns, including partial assignment to multiple patterns when this is required to explain the expression profile. AVAILABILITY: The application is available free to academic users under a material transfer agreement. Go to http://bioinformatics.fccc.edu/ for more details.}, Address = {Bioinformatics Working Group, Fox Chase Cancer Center, Philadelphia, PA 19111, USA. td_moloshok@fccc.edu}, Au = {Moloshok, TD and Klevecz, RR and Grant, JD and Manion, FJ and Speier WF, 4th and Ochs, MF}, Author = {Moloshok, T.D. and Klevecz, R.R. and Grant, J.D. and Manion, F.J. and Speier IV, W.F. and Ochs, M.F.}, Da = {20020517}, Date-Added = {2010-01-08 14:39:10 -0500}, Date-Modified = {2010-01-08 14:39:10 -0500}, Dcom = {20021108}, Edat = {2002/05/23 10:00}, Gr = {CA06927/CA/United States NCI}, Issn = {1367-4803 (Print)}, Jid = {9808944}, Journal = {Bioinformatics}, Jt = {Bioinformatics (Oxford, England)}, Keywords = {Bayesian, signaling network, microarray}, Language = {eng}, Lr = {20071114}, Mh = {*Algorithms; *Bayes Theorem; Cell Cycle/genetics; Databases, Genetic; Gene Expression Regulation; Genome, Fungal; Markov Chains; *Models, Genetic; *Models, Statistical; Monte Carlo Method; Oligonucleotide Array Sequence Analysis/*methods; Pattern Recognition, Automated; Periodicity; Reproducibility of Results; Saccharomyces cerevisiae/genetics; Sensitivity and Specificity}, Mhda = {2002/11/26 04:00}, Number = {4}, Own = {NLM}, Pages = {566--575}, Pl = {England}, Pmid = {12016054}, Pst = {ppublish}, Pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't; Research Support, U.S. Gov't, P.H.S.}, Pubm = {Print}, Sb = {IM}, So = {Bioinformatics. 2002 Apr;18(4):566-75.}, Stat = {MEDLINE}, Title = {Application of {B}ayesian decomposition for analysing microarray data.}, Volume = {18}, Year = {2002}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAkYAAAAAAkYAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMPjDF1IKwAABRay/hVNb2xvc2hva19CRF9ZZWFzdC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFFrL/xebg41BERiBDQVJPAAIACQAACSAAAAAAAAAAAAAAAAAAAAABMgAAEAAIAADD41KtAAAAEQAIAADF5xkjAAAAAQAoBRay/gUWsv0COfpEAjlFGgAUHYQAFBk5AA/qXAAKpBIACqQRAAB63AACAIVNYWNpbnRvc2ggSEQ6VXNlcnM6ZWpmZXJ0aWc6TGlicmFyeTpNYWlsOklNQVAtZWpmZXJ0aWdAbWFpbC5sIzE0MTkzOS5vcmc6SU5CT1g6T2Nocy5pbWFwbWJveDpBdHRhY2htZW50czozMDg3OjI6TW9sb3Nob2tfQkRfWWVhc3QucGRmAAAOACwAFQBNAG8AbABvAHMAaABvAGsAXwBCAEQAXwBZAGUAYQBzAHQALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAHpVc2Vycy9lamZlcnRpZy9MaWJyYXJ5L01haWwvSU1BUC1lamZlcnRpZ0BtYWlsLmxpcXVpZGRpcnQub3JnL0lOQk9YL09jaHMuaW1hcG1ib3gvQXR0YWNobWVudHMvMzA4Ny8yL01vbG9zaG9rX0JEX1llYXN0LnBkZgATAAEvAAAVAAIAD///AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBxLi4vLi4vTGlicmFyeS9NYWlsL0lNQVAtZWpmZXJ0aWdAbWFpbC5saXF1aWRkaXJ0Lm9yZy9JTkJPWC9PY2hzLmltYXBtYm94L0F0dGFjaG1lbnRzLzMwODcvMi9Nb2xvc2hva19CRF9ZZWFzdC5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGUAbABvAHEAcwB2AHgAegB8AIYAkwCYAKAC6gLsAvEC+gMFAwkDFwMeAycDmwOgA6MDsAO1AAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA8c=}} @article{Ochs1999, Abstract = {A frequent problem in analysis is the need to find two matrices, closely related to the underlying measurement process, which when multiplied together reproduce the matrix of data points. Such problems arise throughout science, for example, in imaging where both the calibration of the sensor and the true scene may be unknown and in localized spectroscopy where multiple components may be present in varying amounts in any spectrum. Since both matrices are unknown, such a decomposition is a bilinear problem. We report here a solution to this problem for the case in which the decomposition results in matrices with elements drawn from positive additive distributions. We demonstrate the power of the methodology on chemical shift images (CSI). The new method, Bayesian spectral decomposition (BSD), reduces the CSI data to a small number of basis spectra together with their localized amplitudes. We apply this new algorithm to a 19F nonlocalized study of the catabolism of 5-fluorouracil in human liver, 31P CSI studies of a human head and calf muscle, and simulations which show its strengths and limitations. In all cases, the dataset, viewed as a matrix with rows containing the individual NMR spectra, results from the multiplication of a matrix of generally nonorthogonal basis spectra (the spectral matrix) by a matrix of the amplitudes of each basis spectrum in the the individual voxels (the amplitude matrix). The results show that BSD can simultaneously determine both the basis spectra and their distribution. In principle, BSD should solve this bilinear problem for any dataset which results from multiplication of matrices representing positive additive distributions if the data overdetermine the solutions.}, Address = {NMR and Medical Spectroscopy, Fox Chase Cancer Center, Philadelphia, PA, USA.}, Au = {Ochs, MF and Stoyanova, RS and Arias-Mendoza, F and Brown, TR}, Author = {Ochs, M.F. and Stoyanova, R.S. and Arias-Mendoza, F. and Brown, T.R.}, Ci = {Copyright 1999 Academic Press.}, Da = {19990331}, Date-Added = {2010-01-08 14:39:10 -0500}, Date-Modified = {2010-01-08 14:39:10 -0500}, Dcom = {19990331}, Doi = {10.1006/jmre.1998.1639}, Edat = {1999/03/04}, Gr = {CA41078/CA/United States NCI; CA62556/CA/United States NCI}, Issn = {1090-7807 (Print)}, Jid = {9707935}, Journal = {J Magn Reson}, Jt = {Journal of magnetic resonance (San Diego, Calif. : 1997)}, Keywords = {Markov Chain Monte Carlo, Bayesian, k25}, Language = {eng}, Lr = {20071114}, Mh = {Adenosine Triphosphate/analysis; Bayes Theorem; Brain/metabolism; Fluorouracil/*metabolism; Humans; Image Processing, Computer-Assisted; Liver/*metabolism; Magnesium/analysis; Magnetic Resonance Spectroscopy/*methods; Muscle, Skeletal/metabolism}, Mhda = {1999/03/04 00:01}, Number = {1}, Own = {NLM}, Pages = {161--176}, Pii = {S1090-7807(98)91639-1}, Pl = {UNITED STATES}, Pmid = {10053145}, Pst = {ppublish}, Pt = {Comparative Study; Journal Article; Research Support, U.S. Gov't, P.H.S.}, Pubm = {Print}, Rn = {51-21-8 (Fluorouracil); 56-65-5 (Adenosine Triphosphate); 7439-95-4 (Magnesium)}, Sb = {IM}, So = {J Magn Reson. 1999 Mar;137(1):161-76.}, Stat = {MEDLINE}, Title = {A new method for spectral decomposition using a bilinear Bayesian approach.}, Volume = {137}, Year = {1999}, Bdsk-Url-1 = {http://dx.doi.org/10.1006/jmre.1998.1639}} @conference{Plummer2003, Address = {Vienna, Austria}, Author = {M. Plummer}, Booktitle = {Proceedings of the 3rd Internation Workshop on Distributed Statistical Computing}, Date-Added = {2010-01-08 14:38:05 -0500}, Date-Modified = {2010-02-02 15:47:49 -0500}, Editor = {K. Hornik and F. Leisch and A. Zeileis}, Keywords = {Markov Chain Monte Carlo}, Month = {March 20-22}, Title = {{JAGS}: A program for analysis of {Bayesian} graphical models using {Gibbs} sampling}, Year = {2003}} @article{Ochs2009, Abstract = {Cell signaling plays a central role in the etiology of cancer. Numerous therapeutics in use or under development target signaling proteins; however, off-target effects often limit assignment of positive clinical response to the intended target. As direct measurements of signaling protein activity are not generally feasible during treatment, there is a need for more powerful methods to determine if therapeutics inhibit their targets and when off-target effects occur. We have used the Bayesian Decomposition algorithm and data on transcriptional regulation to create a novel methodology, Differential Expression for Signaling Determination (DESIDE), for inferring signaling activity from microarray measurements. We applied DESIDE to deduce signaling activity in gastrointestinal stromal tumor cell lines treated with the targeted therapeutic imatinib mesylate (Gleevec). We detected the expected reduced activity in the KIT pathway, as well as unexpected changes in the p53 pathway. Pursuing these findings, we have determined that imatinib-induced DNA damage is responsible for the increased activity of p53, identifying a novel off-target activity for this drug. We then used DESIDE on data from resected, post-imatinib treatment tumor samples and identified a pattern in these tumors similar to that at late time points in the cell lines, and this pattern correlated with initial clinical response. The pattern showed increased activity of ETS domain-containing protein Elk-1 and signal transducers and activators of transcription 3 transcription factors, which are associated with the growth of side population cells. DESIDE infers the global reprogramming of signaling networks during treatment, permitting treatment modification that leverages ongoing drug development efforts, which is crucial for personalized medicine.}, Address = {Division of Oncology Biostatistics and Bioinformatics, Johns Hopkins University, Baltimore, Maryland 21205, USA. mfo@jhu.edu}, Au = {Ochs, MF and Rink, L and Tarn, C and Mburu, S and Taguchi, T and Eisenberg, B and Godwin, AK}, Author = {Ochs, M.F. and Rink, L. and Tarn, C. and Mburu, S. and Taguchi, T. and Eisenberg, B. and Godwin, A.K.}, Crdt = {2009/11/12 06:00}, Da = {20091204}, Date-Added = {2010-01-08 14:35:07 -0500}, Date-Modified = {2010-02-02 15:57:46 -0500}, Dcom = {20091221}, Dep = {20091110}, Doi = {10.1158/0008-5472.CAN-09-1709}, Edat = {2009/11/12 06:00}, Gr = {CA009035/CA/NCI NIH HHS/United States; CA106588/CA/NCI NIH HHS/United States; CA21661/CA/NCI NIH HHS/United States; LM009382/LM/NLM NIH HHS/United States}, Issn = {1538-7445 (Electronic); 1538-7445 (Linking)}, Jid = {2984705R}, Journal = {Cancer Res}, Jt = {Cancer research}, Language = {eng}, Mh = {Antineoplastic Agents/*pharmacology; Cell Line, Tumor; DNA Damage; Gastrointestinal Stromal Tumors/*drug therapy/*genetics/metabolism; Gene Expression Profiling; Humans; Piperazines/*pharmacology; Pyrimidines/*pharmacology; RNA, Messenger/biosynthesis/genetics; STAT3 Transcription Factor/metabolism; Signal Transduction; Tumor Suppressor Protein p53/genetics/metabolism; ets-Domain Protein Elk-1/metabolism}, Mhda = {2009/12/22 06:00}, Mid = {NIHMS149886}, Number = {23}, Oid = {NLM: NIHMS149886 {$[$}Available on 12/01/10{$]$}; NLM: PMC2789202 {$[$}Available on 12/01/10{$]$}}, Own = {NLM}, Pages = {9125--9132}, Phst = {2009/11/10 {$[$}aheadofprint{$]$}}, Pii = {0008-5472.CAN-09-1709}, Pl = {United States}, Pmc = {PMC2789202}, Pmcr = {2010/12/01}, Pmid = {19903850}, Pst = {ppublish}, Pt = {Journal Article; Research Support, N.I.H., Extramural; Research Support, Non-U.S. Gov't}, Rn = {0 (Antineoplastic Agents); 0 (ELK1 protein, human); 0 (Piperazines); 0 (Pyrimidines); 0 (RNA, Messenger); 0 (STAT3 Transcription Factor); 0 (STAT3 protein, human); 0 (TP53 protein, human); 0 (Tumor Suppressor Protein p53); 0 (ets-Domain Protein Elk-1); 152459-95-5 (imatinib)}, Sb = {IM}, So = {Cancer Res. 2009 Dec 1;69(23):9125-32. Epub 2009 Nov 10.}, Stat = {MEDLINE}, Title = {Detection of treatment-induced changes in signaling pathways in gastrointestinal stromal tumors using transcriptomic data.}, Volume = {69}, Year = {2009}, Bdsk-Url-1 = {http://dx.doi.org/10.1158/0008-5472.CAN-09-1709}} @conference{Skilling1998, Address = {Dordrecht/Boston/London}, Author = {J. Skilling}, Booktitle = {Maximum Entropy and Bayesian Methods, Proceedings of the 17th International Workshop on Maxiumum Entropy and Bayesian Methods of Statistical Analysis}, Date-Added = {2010-01-08 14:27:23 -0500}, Date-Modified = {2010-01-08 14:29:16 -0500}, Editor = {G. J. Erickson and J. T. Rychert and C. R. Smith}, Publisher = {Kluwer Academic Publishers}, Title = {Massive inference and maximum entropy}, Year = {1998}} @article{Sibisi1997, Author = {S. Sibisi and J. Skilling}, Date-Added = {2010-01-08 13:50:30 -0500}, Date-Modified = {2010-01-08 13:50:30 -0500}, Journal = {Journal of the Royal Statistical Society, B}, Keywords = {Bayesian, probability}, Number = {1}, Pages = {217-235}, Title = {Prior distributions on measure space}, Volume = {59}, Year = {1997}, Bdsk-File-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAfIAAAAAAfIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMkCCRlIKwAAAAnLqh9TaWJpc2lTa2lsbGluZ19KUm95YWwjOUNDRTEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACczhxKuwAwAAAAAAAAAA/////wAACSAAAAAAAAAAAAAAAAAAAAAKUmVmZXJlbmNlcwAQAAgAAMkCT2kAAAARAAgAAMSr6EMAAAABABAACcuqAAhtRgAITPkAAJMnAAIAUE1hY2ludG9zaCBIRDpVc2VyczplamZlcnRpZzpEb2N1bWVudHM6UmVmZXJlbmNlczpTaWJpc2lTa2lsbGluZ19KUm95YWwjOUNDRTEucGRmAA4ATgAmAFMAaQBiAGkAcwBpAFMAawBpAGwAbABpAG4AZwBfAEoAUgBvAHkAYQBsAFMAdABhAHQAUwBvAGMAQgBfADEAOQA5ADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEpVc2Vycy9lamZlcnRpZy9Eb2N1bWVudHMvUmVmZXJlbmNlcy9TaWJpc2lTa2lsbGluZ19KUm95YWxTdGF0U29jQl8xOTk3LnBkZgATAAEvAAAVAAIAD///AACABdIcHR4fWCRjbGFzc2VzWiRjbGFzc25hbWWjHyAhXU5TTXV0YWJsZURhdGFWTlNEYXRhWE5TT2JqZWN0XxBiLi4vLi4vLi4vLi4vLi4vLi4vLi4vLi4vVXNlcnMvZWpmZXJ0aWcvRG9jdW1lbnRzL1JlZmVyZW5jZXMvU2liaXNpU2tpbGxpbmdfSlJveWFsU3RhdFNvY0JfMTk5Ny5wZGbSHB0kJaIlIVxOU0RpY3Rpb25hcnkSAAGGoF8QD05TS2V5ZWRBcmNoaXZlcgAIABEAFgAfACgAMgA1ADoAPABFAEsAUgBdAGUAbABvAHEAcwB2AHgAegB8AIYAkwCYAKAClgKYAp0CpgKxArUCwwLKAtMDOAM9A0ADTQNSAAAAAAAAAgEAAAAAAAAAKAAAAAAAAAAAAAAAAAAAA2Q=}}