@TECHREPORT{MclustSoftwareManual2006, author = {{C}. {F}raley and {A}. {E}. {R}aftery}, title = {{MCLUST} {V}ersion 3 for {R}: {N}ormal {M}ixture {M}odeling and {M}odel-{B}ased {C}lustering}, institution = {University of Washington}, year = {2006}, type = {Technical Report}, number = {504}, address = {Department of Statistics}, note = {revised 2009} } @ARTICLE{Schmidt2008, author = {Marcus Schmidt and Daniel Böhm and Christian von Törne and Eric Steiner and Alexander Puhl and Henryk Pilch and Hans-Anton Lehr and Jan G Hengstler and Heinz Kölbl and Mathias Gehrmann}, title = {The humoral immune system has a key prognostic impact in node-negative breast cancer.}, journal = {Cancer Res}, year = {2008}, volume = {68}, pages = {5405--5413}, number = {13}, month = {Jul}, __markedentry = {[anne:6]}, abstract = {Estrogen receptor (ER) expression and proliferative activity are established prognostic factors in breast cancer. In a search for additional prognostic motifs, we analyzed the gene expression patterns of 200 tumors of patients who were not treated by systemic therapy after surgery using a discovery approach. After performing hierarchical cluster analysis, we identified coregulated genes related to the biological process of proliferation, steroid hormone receptor expression, as well as B-cell and T-cell infiltration. We calculated metagenes as a surrogate for all genes contained within a particular cluster and visualized the relative expression in relation to time to metastasis with principal component analysis. Distinct patterns led to the hypothesis of a prognostic role of the immune system in tumors with high expression of proliferation-associated genes. In multivariate Cox regression analysis, the proliferation metagene showed a significant association with metastasis-free survival of the whole discovery cohort [hazard ratio (HR), 2.20; 95\% confidence interval (95\% CI), 1.40-3.46]. The B-cell metagene showed additional independent prognostic information in carcinomas with high proliferative activity (HR, 0.66; 95\% CI, 0.46-0.97). A prognostic influence of the B-cell metagene was independently confirmed by multivariate analysis in a first validation cohort enriched for high-grade tumors (n = 286; HR, 0.78; 95\% CI, 0.62-0.98) and a second validation cohort enriched for younger patients (n = 302; HR, 0.83; 95\% CI, 0.7-0.97). Thus, we could show in three cohorts of untreated, node-negative breast cancer patients that the humoral immune system plays a pivotal role in metastasis-free survival of carcinomas of the breast.}, doi = {10.1158/0008-5472.CAN-07-5206}, institution = {Department of Obstetrics and Gynecology, Medical School, Johannes Gutenberg University, Mainz, Germany.}, keywords = {Adult; Aged; Aged, 80 and over; Antibody Formation, physiology; Breast Neoplasms, diagnosis/genetics/immunology/pathology; Carcinoma, diagnosis/genetics/immunology/pathology; Cell Proliferation; Cluster Analysis; Cohort Studies; Female; Gene Expression Profiling; Gene Expression Regulation, Neoplastic; Genes, Neoplasm; Humans; Lymph Nodes, immunology/pathology; Lymphatic Metastasis; Middle Aged; Neutrophil Infiltration, genetics; Oligonucleotide Array Sequence Analysis; Prognosis}, language = {eng}, medline-pst = {ppublish}, owner = {anne}, pii = {68/13/5405}, pmid = {18593943}, timestamp = {2012.12.13}, url = {http://dx.doi.org/10.1158/0008-5472.CAN-07-5206} } @ARTICLE{FraleyRaftery2002, author = {{C}. {F}raley and {A}. {E}. {R}aftery}, title = {{M}odel-{B}ased {C}lustering, {D}iscriminant {A}nalysis and {D}ensity {E}stimation}, journal = {{J}ournal of the {A}merican {S}tatistical {A}ssociation}, year = {2002}, volume = {97}, pages = {611-631} } @ARTICLE{Falcon2007Using, author = {Falcon, S. and Gentleman, R.}, title = {{{U}sing {G}{O}stats to test gene lists for {G}{O} term association}}, journal = {Bioinformatics}, year = {2007}, volume = {23}, pages = {257--258}, month = {Jan}, abstract = {MOTIVATION: Functional analyses based on the association of Gene Ontology (GO) terms to genes in a selected gene list are useful bioinformatic tools and the GOstats package has been widely used to perform such computations. In this paper we report significant improvements and extensions such as support for conditional testing. RESULTS: We discuss the capabilities of GOstats, a Bioconductor package written in R, that allows users to test GO terms for over or under-representation using either a classical hypergeometric test or a conditional hypergeometric that uses the relationships among GO terms to decorrelate the results. AVAILABILITY: GOstats is available as an R package from the Bioconductor project: http://bioconductor.org}, pmid = {17098774}, url = {http://bioinformatics.oxfordjournals.org/cgi/content/full/23/2/257?view=long&pmid=17098774} } @ARTICLE{Lee2003Application, author = {Su-In Lee and Serafim Batzoglou}, title = {Application of independent component analysis to microarrays.}, journal = {Genome Biol}, year = {2003}, volume = {4}, pages = {R76}, number = {11}, abstract = {We apply linear and nonlinear independent component analysis (ICA) to project microarray data into statistically independent components that correspond to putative biological processes, and to cluster genes according to over- or under-expression in each component. We test the statistical significance of enrichment of gene annotations within clusters. ICA outperforms other leading methods, such as principal component analysis, k-means clustering and the Plaid model, in constructing functionally coherent clusters on microarray datasets from Saccharomyces cerevisiae, Caenorhabditis elegans and human.}, doi = {10.1186/gb-2003-4-11-r76}, institution = {Department of Computer Science, Stanford University, Stanford, CA94305-9010, USA.}, keywords = {Algorithms; Animals; Caenorhabditis elegans; Cluster Analysis; Gene Expression Profiling; Humans; Models, Genetic; Oligonucleotide Array Sequence Analysis; Saccharomyces cerevisiae; Statistics as Topic, Independent Component Analysis}, owner = {abiton}, pii = {gb-2003-4-11-r76}, pmid = {14611662}, timestamp = {2010.11.09}, url = {http://dx.doi.org/10.1186/gb-2003-4-11-r76} } @ARTICLE{Sanchez-Carbayo2006Defining, author = {{S}anchez-{C}arbayo, {M}. and {S}occi, {N}. {D}. and {L}ozano, {J}. and {S}aint, {F}. and {C}ordon-{C}ardo, {C}.}, title = {{Defining molecular profiles of poor outcome in patients with invasive bladder cancer using oligonucleotide microarrays}}, journal = {{J}. {C}lin. {O}ncol.}, year = {2006}, volume = {24}, pages = {778--789}, month = {Feb}, abstract = {PURPOSE: Bladder cancer is a common malignancy characterized by a poor clinical outcome when tumors progress into invasive disease. We sought to define genetic signatures characteristic of aggressive clinical behavior in advanced bladder tumors. METHODS: Oligonucleotide arrays were utilized to analyze the transcript profiles of 105 bladder tumors: 33 superficial, 72 invasive lesions, and 52 normal urothelium. Hierarchical clustering and supervised algorithms were used to classify and stratify bladder tumors on the basis of stage, node metastases, and overall survival. Immunohistochemical analyses on bladder cancer tissue arrays (n = 294 cases) served to validate associations between marker expression, staging and outcome. RESULTS: Hierarchical clustering classified normal urothelium, superficial, and invasive tumors with 82.2% accuracy, and stratified bladder tumors on the basis of clinical outcome. Predictive algorithms rendered an 89%-correct rate for tumor staging using genes differentially expressed between superficial and invasive tumors. Accuracies of 82% and 90% were obtained for predicting overall survival when considering all patients with bladder cancer or only patients with invasive disease, respectively. A genetic profile consisting of 174 probes was identified in those patients with positive lymph nodes and poor survival. Two independent Global Test runs confirmed the robust association of this profile with lymph node metastases (P = 7.3(-13)) and overall survival (P = 1.9(-14)) simultaneously. Immunohistochemical analyses on tissue arrays sustained the significant association of synuclein with tumor staging and clinical outcome (P = .002). CONCLUSION: Gene profiling provides a genomic-based classification scheme of diagnostic and prognostic utility for stratifying advanced bladder cancer. Identification of this poor outcome profile could assist in selecting patients who may benefit from more aggressive therapeutic intervention.}, url = {http://jco.ascopubs.org/content/24/5/778.long} } @ARTICLE{Teschendorff2007Elucidating, author = {{T}eschendorff, {A}. {E}. and {J}ournee, {M}. and {A}bsil, {P}. {A}. and {S}epulchre, {R}. and {C}aldas, {C}.}, title = {{Elucidating the altered transcriptional programs in breast cancer using independent component analysis}}, journal = {{PL}o{S} {C}omput. {B}iol.}, year = {2007}, volume = {3}, pages = {e161}, month = {Aug}, abstract = {The quantity of mRNA transcripts in a cell is determined by a complex interplay of cooperative and counteracting biological processes. Independent Component Analysis (ICA) is one of a few number of unsupervised algorithms that have been applied to microarray gene expression data in an attempt to understand phenotype differences in terms of changes in the activation/inhibition patterns of biological pathways. While the ICA model has been shown to outperform other linear representations of the data such as Principal Components Analysis (PCA), a validation using explicit pathway and regulatory element information has not yet been performed. We apply a range of popular ICA algorithms to six of the largest microarray cancer datasets and use pathway-knowledge and regulatory-element databases for validation. We show that ICA outperforms PCA and clustering-based methods in that ICA components map closer to known cancer-related pathways, regulatory modules, and cancer phenotypes. Furthermore, we identify cancer signalling and oncogenic pathways and regulatory modules that play a prominent role in breast cancer and relate the differential activation patterns of these to breast cancer phenotypes. Importantly, we find novel associations linking immune response and epithelial-mesenchymal transition pathways with estrogen receptor status and histological grade, respectively. In addition, we find associations linking the activity levels of biological pathways and transcription factors (NF1 and NFAT) with clinical outcome in breast cancer. ICA provides a framework for a more biologically relevant interpretation of genomewide transcriptomic data. Adopting ICA as the analysis tool of choice will help understand the phenotype-pathway relationship and thus help elucidate the molecular taxonomy of heterogeneous cancers and of other complex genetic diseases.}, keywords = {Independent Component Analysis}, url = {http://www.ploscompbiol.org/article/info:doi/10.1371/journal.pcbi.0030161} } @ARTICLE{Saidi2004Independent, author = {{S}aidi, {S}. {A}. and {H}olland, {C}. {M}. and {K}reil, {D}. {P}. and {M}ac{K}ay, {D}. {J}. and {C}harnock-{J}ones, {D}. {S}. and {P}rint, {C}. {G}. and {S}mith, {S}. {K}.}, title = {{Independent component analysis of microarray data in the study of endometrial cancer}}, journal = {{O}ncogene}, year = {2004}, volume = {23}, pages = {6677--6683}, month = {Aug}, abstract = {Gene microarray technology is highly effective in screening for differential gene expression and has hence become a popular tool in the molecular investigation of cancer. When applied to tumours, molecular characteristics may be correlated with clinical features such as response to chemotherapy. Exploitation of the huge amount of data generated by microarrays is difficult, however, and constitutes a major challenge in the advancement of this methodology. Independent component analysis (ICA), a modern statistical method, allows us to better understand data in such complex and noisy measurement environments. The technique has the potential to significantly increase the quality of the resulting data and improve the biological validity of subsequent analysis. We performed microarray experiments on 31 postmenopausal endometrial biopsies, comprising 11 benign and 20 malignant samples. We compared ICA to the established methods of principal component analysis (PCA), Cyber-T, and SAM. We show that ICA generated patterns that clearly characterized the malignant samples studied, in contrast to PCA. Moreover, ICA improved the biological validity of the genes identified as differentially expressed in endometrial carcinoma, compared to those found by Cyber-T and SAM. In particular, several genes involved in lipid metabolism that are differentially expressed in endometrial carcinoma were only found using this method. This report highlights the potential of ICA in the analysis of microarray data.}, keywords = {Independent Component Analysis} } @ARTICLE{Frigyesi2006Independent, author = {{F}rigyesi, {A}. and {V}eerla, {S}. and {L}indgren, {D}. and {H}oglund, {M}.}, title = {{Independent component analysis reveals new and biologically significant structures in micro array data}}, journal = {{BMC} {B}ioinformatics}, year = {2006}, volume = {7}, pages = {290}, abstract = {BACKGROUND: An alternative to standard approaches to uncover biologically meaningful structures in micro array data is to treat the data as a blind source separation (BSS) problem. BSS attempts to separate a mixture of signals into their different sources and refers to the problem of recovering signals from several observed linear mixtures. In the context of micro array data, "sources" may correspond to specific cellular responses or to co-regulated genes. RESULTS: We applied independent component analysis (ICA) to three different microarray data sets; two tumor data sets and one time series experiment. To obtain reliable components we used iterated ICA to estimate component centrotypes. We found that many of the low ranking components indeed may show a strong biological coherence and hence be of biological significance. Generally ICA achieved a higher resolution when compared with results based on correlated expression and a larger number of gene clusters with significantly enriched for gene ontology (GO) categories. In addition, components characteristic for molecular subtypes and for tumors with specific chromosomal translocations were identified. ICA also identified more than one gene clusters significant for the same GO categories and hence disclosed a higher level of biological heterogeneity, even within coherent groups of genes. CONCLUSION: Although the ICA approach primarily detects hidden variables, these surfaced as highly correlated genes in time series data and in one instance in the tumor data. This further strengthens the biological relevance of latent variables detected by ICA.}, keywords = {Independent Component Analysis}, url = {http://www.ncbi.nlm.nih.gov/pubmed/16762055} } @ARTICLE{Himberg2004Validating, author = {{J}ohan {H}imberg and {A}apo {H}yvärinen and {F}abrizio {E}sposito}, title = {{V}alidating the independent components of neuroimaging time series via clustering and visualization.}, journal = {{N}euroimage}, year = {2004}, volume = {22}, pages = {1214--1222}, number = {3}, month = {Jul}, abstract = {Recently, independent component analysis (ICA) has been widely used in the analysis of brain imaging data. An important problem with most ICA algorithms is, however, that they are stochastic; that is, their results may be somewhat different in different runs of the algorithm. Thus, the outputs of a single run of an ICA algorithm should be interpreted with some reserve, and further analysis of the algorithmic reliability of the components is needed. Moreover, as with any statistical method, the results are affected by the random sampling of the data, and some analysis of the statistical significance or reliability should be done as well. Here we present a method for assessing both the algorithmic and statistical reliability of estimated independent components. The method is based on running the ICA algorithm many times with slightly different conditions and visualizing the clustering structure of the obtained components in the signal space. In experiments with magnetoencephalographic (MEG) and functional magnetic resonance imaging (fMRI) data, the method was able to show that expected components are reliable; furthermore, it pointed out components whose interpretation was not obvious but whose reliability should incite the experimenter to investigate the underlying technical or physical phenomena. The method is implemented in a software package called Icasso.}, doi = {10.1016/j.neuroimage.2004.03.027}, institution = {Neural Networks Research Centre, Helsinki University of Technology, Helsinki, Finland.}, keywords = {Algorithms; Brain; Cluster Analysis; Data Interpretation, Statistical; Fingers; Humans; Image Processing, Computer-Assisted; Magnetic Resonance Imaging; Magnetoencephalography; Models, Neurological; Motor Activity; Software, Independent Component Analysis}, owner = {abiton}, pii = {S1053811904001661}, pmid = {15219593}, timestamp = {2010.11.10}, url = {http://dx.doi.org/10.1016/j.neuroimage.2004.03.027} } @ARTICLE{Cline2007Integration, author = {{M}elissa {S} {C}line and {M}ichael {S}moot and {E}than {C}erami and {A}llan {K}uchinsky and {N}erius {L}andys and {C}hris {W}orkman and {R}owan {C}hristmas and {I}liana {A}vila-{C}ampilo and {M}ichael {C}reech and {B}enjamin {G}ross and {K}ristina {H}anspers and {R}uth {I}sserlin and {R}yan {K}elley and {S}arah {K}illcoyne and {S}amad {L}otia and {S}teven {M}aere and {J}ohn {M}orris and {K}eiichiro {O}no and {V}uk {P}avlovic and {A}lexander {R} {P}ico and {A}ditya {V}ailaya and {P}eng-{L}iang {W}ang and {A}nnette {A}dler and {B}ruce {R} {C}onklin and {L}eroy {H}ood and {M}artin {K}uiper and {C}hris {S}ander and {I}lya {S}chmulevich and {B}enno {S}chwikowski and {G}uy {J} {W}arner and {T}rey {I}deker and {G}ary {D} {B}ader}, title = {{I}ntegration of biological networks and gene expression data using {C}ytoscape.}, journal = {{N}at {P}rotoc}, year = {2007}, volume = {2}, pages = {2366--2382}, number = {10}, abstract = {Cytoscape is a free software package for visualizing, modeling and analyzing molecular and genetic interaction networks. This protocol explains how to use Cytoscape to analyze the results of mRNA expression profiling, and other functional genomics and proteomics experiments, in the context of an interaction network obtained for genes of interest. Five major steps are described: (i) obtaining a gene or protein network, (ii) displaying the network using layout algorithms, (iii) integrating with gene expression and other functional attributes, (iv) identifying putative complexes and functional modules and (v) identifying enriched Gene Ontology annotations in the network. These steps provide a broad sample of the types of analyses performed by Cytoscape.}, doi = {10.1038/nprot.2007.324}, institution = {Institut Pasteur, 25-28 rue du Docteur Roux, 75724 Paris cedex 15, France.}, keywords = {Computational Biology; Gene Expression Profiling; Gene Regulatory Networks; Genomics; Proteomics; RNA, Messenger; Software}, owner = {abiton}, pii = {nprot.2007.324}, pmid = {17947979}, timestamp = {2011.01.04}, url = {http://dx.doi.org/10.1038/nprot.2007.324} }