## Copyright 2010 Laurent Jacob, Pierre Neuvial and Sandrine Dudoit. ## This file is part of DEGraph. ## DEGraph is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## DEGraph is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with DEGraph. If not, see <http://www.gnu.org/licenses/>. #########################################################################/** ## @RdocFunction testOneConnectedComponent ## ## @title "Applies a series of two-sample tests to a connected graph using various statistics" ## ## \description{ ## @get "title". ## } ## ## @synopsis ## ## \arguments{ ## \item{graph}{A \code{\link[=graph-class]{graph}} object.} ## \item{data}{A '@numeric @matrix (size: number 'p' of genes x number 'n' of samples) of gene expression.} ## \item{classes}{A @character @vector (length: 'n') of class assignments.} ## \item{...}{Further arguments to be passed to @see "laplacianFromA".} ## \item{prop}{A @numeric value, percentage of components retained for Fourier and PCA.} ## \item{verbose}{If @TRUE, extra information is output.} ## } ## ## \value{ ## A structured @list containing the p-values of the tests, the ## \code{\link[=graph-class]{graph}} object of the connected component and the number ## of retained Fourier dimensions. ## } ## ## @author ## ## \details{ ## This function performs the test, assuming that all genes ## in the graph are represented in the expression data set, ## in order not to have to modify the graph topology. ## ## Interaction signs are used if available in the graph ## ('getSignedGraph' is not called here, in order not to ## have to modify the graph topology.). ## ## The graph given as input has to have only one ## connex component. It can be retrieved from the output of ## @see "getConnectedComponentList". ## } ## ## \seealso{ ## @see "testOneGraph" ## @see "getConnectedComponentList" ## } ## ## @examples "../incl/graph.T2.test.Rex" ## ##*/######################################################################## testOneConnectedComponent <- function(graph, data, classes, ..., prop=0.2, verbose=FALSE) { ## - - - - - - - - - - - - - - - - - - - ## Validate arguments ## - - - - - - - - - - - - - - - - - - - ## Argument 'classes' classes <- Arguments$getNumerics(classes) ll <- length(unique(classes)) if (ll != 2) { throw("Number of classes should be 2, not ", ll) } rm(ll) ## Argument 'data' cls <- class(data)[1] if (cls != "matrix") { throw("Arguments 'data' should be a 'matrix', not a ", cls) } n <- ncol(data) nc <- length(classes) if (n != nc) { throw("Number of samples in 'data' (", n, ") should match number of samples in 'classes' (", nc, ")") } rm(nc) ## Argument 'graph' if (!validGraph(graph)) { throw("Argument 'graph' is not a valid graph object") } ## Check that there is only one connex component cc <- connectedComp(graph) if (length(cc)>1) { throw("More than one connex component in graph") } rm(cc) ## Argument 'verbose': verbose <- Arguments$getVerbose(verbose) if (verbose) { cat <- R.utils::cat pushState(verbose) on.exit(popState(verbose)) } ## check consistency between gene and node names verbose && enter(verbose, "Keeping genes in the graph *and* the expression data set") dataGN <- rownames(data) graphGN <- nodes(graph) mm <- match(graphGN, dataGN) ww <- which(is.na(mm)) ll <- length(ww) if (ll) { throw(ll, " genes of the graph were not found in the expression data set: ") } ## sorting (and subsetting if necessary) data <- data[mm, ] ## sanity check (ordering should be the same now) dataGN <- rownames(data) stopifnot(all.equal(dataGN, graphGN)) rm(graphGN) rm(dataGN) verbose && exit(verbose) p <- numNodes(graph) geneNames <- rownames(data) cls <- sort(unique(classes)) X1 <- t(data[, classes==cls[1]]) X2 <- t(data[, classes==cls[2]]) ## get sign information (if any) and infer from its presence the type of graph #signMat <- attr(graph, 'signMat') signMat <- graph@graphData$signMat if (is.null(signMat)) { verbose && cat(verbose, "Unsigned graph") ## use adjacency matrix of the corresponding undirected graph ##ugraph <- ugraph(graph) ##graphAM <- as(ugraph, "graphAM") ##adjMat <- graphAM@adjMat adjMat <- as(graph,"graphAM")@adjMat ## sanity check: symmetry ##stopifnot(sum(sum(t(adjMat)!=adjMat))==0) mat <- adjMat } else { verbose && cat(verbose, "Signed graph") mat <- signMat } ##print(mat) ## Argument 'prop' if (is.null(prop)) { prop <- c(0.05, 0.1, 0.2, 0.4) ndim <- unique(c(1:5, ceiling(p*prop))) ndim <- sort(ndim[ndim<=p]) } else { prop <- Arguments$getNumerics(prop, range=c(0, 1)) ndim <- unique(ceiling(p*prop)) } dg <- rowSums(abs(mat)) verbose && cat(verbose, "Degrees") verbose && str(verbose, dg) ## TODO: take multiplicity of eigen value into account using parameter 'k' lfA <- laplacianFromA(mat, ..., ltype="meanInfluence", k=1) U <- lfA$U res <- NULL resNames <- NULL ## T2 in original space verbose && enter(verbose, "Testing") verbose && cat(verbose, "Raw T2 (Hotelling)") T2H <- try(T2.test(X1, X2)) if (class(T2H)=="try-error") { pH <- NA } else { pH <- T2H$p.value verbose & str(verbose, pH) } res <- c(res, pH) resNames <- c(resNames, "T2") ## T2 in Fourier space verbose && cat(verbose, "T2 on Fourier components") pU <- rep(NA, length=length(ndim)) names(pU) <- ndim for (kk in seq(along=ndim)) { kR <- ndim[kk] T2U <- graph.T2.test(X1, X2, G=NULL, lfA=lfA, k=kR) ##T2U <- T2.test(X1%*%U[, 1:kR], X2%*%U[, 1:kR]) pU[kk] <- T2U$p.value } verbose & str(verbose, pU) res <- c(res, pU) resNames <- c(resNames, paste("T2 (", ndim, " Fourier components)", sep="")) ## T2 with PCA, other test statistics if (FALSE) { verbose && cat(verbose, "T2 on PCA components") edX <- svd(rbind(X1, X2)) E <- edX$v pPCA <- rep(NA, length=length(ndim)) names(pPCA) <- ndim for (kk in seq(along=ndim)) { kR <- ndim[kk] T2PCA <- T2.test(X1%*%E[, 1:kR], X2%*%E[, 1:kR]) pPCA[kk] <- T2PCA$p.value } res <- c(res, pPCA) resNames <- c(resames, paste("T2 (", ndim, " PCA components)", sep="")) verbose & str(verbose, pPCA) verbose && cat(verbose, "Adaptive Neyman (Fan)") T2AN <- AN.test(X1%*%U, X2%*%U, p) pAN <- T2AN$p.value verbose & str(verbose, pAN) res <- c(res, pAN) resNames <- c(resames, "Adaptive Neyman") } ## if (FALSE) verbose && exit(verbose) names(res) <- resNames ret <- list(p.value=res, graph=graph, k=ndim) ret } ############################################################################ ## HISTORY: ## 2010-10-08 ## o Now validating argument 'verbose'. ## 2010-10-01 ## o Now manipulates A instead of t(A) (transposition made within ## laplacianFromA). ## 2010-08-05 ## o CLEAN-UP: Now uses 'laplacianFromA'. ## 2010-07-15 ## o ROBUSTIFICATION: Now use 'cls <- sort(unique(classes))' to make 'cls' ## independent of the input order of the label vector. ## 2010-05 ## o Created. ############################################################################