subpathwayTypes <- function(grouping='all') { subTypes <- .subpathwayTypes() supportedMethods <- c( subTypes[['subStreamCases']], subTypes[['neighborCases']], subTypes[['linearCases']], subTypes[['communityCases']], subTypes[['componentCases']]) if ( is.null(grouping) || grouping == '' ) { return(NULL) } if ( length(grouping) == 1 && grepl('^all', grouping) ) { subType <- supportedMethods type <- gsub('all.', '', grouping) intMeasures <- c('bwd', 'fwd', 'stream', 'neighbourhood', 'cascade', 'community', 'component', 'topological', 'functional', 'DEG') extMeasures <- .getExternalMeasures() if ( type %in% c(intMeasures, extMeasures) ) { supportedMethods <- subType[grepl(type, subType)] } } if ( length(grouping) == 1 && !grepl('^all', grouping) ) { supportedMethods <- grouping } return( supportedMethods ) } .subpathwayTypes <- function() { stream.choices <- c('fwd', 'bwd') commun.choices <- c('walktrap', 'edge_betweenness', 'fast_greedy', 'leading_eigen', 'infomap', 'louvain' ) compon.choices <- c('decompose', 'max_cliques', 'cliques', 'coreness' ) topological.choices <- c( 'topological.degree', 'topological.betweenness', 'topological.closeness', 'topological.hub_score', 'topological.eccentricity', 'topological.page_rank', 'topological.start_nodes') functional.choices <- paste0('functional.', .getFunctionalMeasures()) source.choices <- c(topological.choices, functional.choices) cases <- expand.grid( stream.choices, 'neighbourhood', source.choices) neighborCases <- apply(cases, 1, function(x) {paste0(x, collapse='.')}) cases <- expand.grid( stream.choices, 'stream', source.choices) subStreamCases <- apply(cases, 1, function(x) {paste0(x, collapse='.')}) cases <- expand.grid( stream.choices, 'cascade', source.choices) linearCases <- apply(cases, 1, function(x) {paste0(x, collapse='.')}) cases <- expand.grid( 'community', commun.choices) communityCases <- apply(cases, 1, function(x) {paste0(x, collapse='.')}) cases <- expand.grid( 'component', compon.choices[1:2]) compCases_a <- apply(cases, 1, function(x) {paste0(x, collapse='.')}) cases <- expand.grid( 'component.', paste0(3:9, '-', compon.choices[3]) ) compCases_b <- apply(cases, 1, function(x) {paste0(x, collapse='')}) cases <- expand.grid( 'component.', paste0(3:9, '-', compon.choices[4]) ) compCases_c <- apply(cases, 1, function(x) {paste0(x, collapse='')}) componentCases <- c(compCases_a, compCases_b, compCases_c) return(list('neighborCases'=neighborCases, 'subStreamCases'=subStreamCases, 'linearCases'=linearCases, 'communityCases'=communityCases, 'componentCases'=componentCases)) } .subpathwayAnalysis <- function( edgeList, method=c(), DEgenes, a=3, b=10, org, verbose=TRUE ) { # a: Number of neighbors # b: Number of top-nodes supportedMethods <- subpathwayTypes() unsupportedOptions <- method[!method %in% supportedMethods] if ( length(unsupportedOptions) > 0 ) { message('Option(s) ', unsupportedOptions, ' not supported.') out <- list(NULL) names(out) <- paste0('subAnalysis.', method) return( out ) } if (verbose) { message('Performing subpathway analysis ( ', method , ' )...', appendLF = FALSE) } # Create an igraph object from an edgelist gi <- graph_from_edgelist(as.matrix(edgeList[, 1:2])) # Change direction of substream if necessary if ( gsub('bwd.', '', method) != method ) { edgeList <- get.edgelist(gi) gi <- graph_from_edgelist(cbind(edgeList[, 2], edgeList[, 1])) } if ( nrow(edgeList) == 0 ) { if (verbose) { message('done.') } return (NULL) } subTypes <- .subpathwayTypes() if ( method %in% subTypes[['neighborCases']] ) { sourceMeasure <- gsub('fwd.neighbourhood.', '', method) sourceMeasure <- gsub('bwd.neighbourhood.', '', sourceMeasure) nodes <- .measureToNodes(graph=gi, measure=sourceMeasure, org=org, DEgenes=DEgenes ) sourceNodes <- nodes[1 : min(b, length(nodes))] if (length(nodes) == 0) { out <- list(NULL) names(out) <- paste0('subAnalysis.', method) if (verbose) { message('done.') } return(out) } R <- vector(mode='list', length=length(sourceNodes)) for (i in seq_len(length(sourceNodes)) ) { R[[i]] <- names(unlist(ego(graph=gi, order=a, nodes=sourceNodes[i]))) } names(R) <- paste0('sub', seq_len(length(R))) out <- list(R) names(out) <- paste0('subAnalysis.', method) } if ( method %in% subTypes[['subStreamCases']] ) { sourceMeasure <- gsub('fwd.stream.', '', method) sourceMeasure <- gsub('bwd.stream.', '', sourceMeasure) nodes <- .measureToNodes(graph=gi, measure=sourceMeasure, org=org, DEgenes=DEgenes ) sourceNodes <- nodes[1 : min(b, length(nodes))] if (length(nodes) == 0) { out <- list(NULL) names(out) <- paste0('subAnalysis.', method) if (verbose) { message('done.') } return(out) } R <- vector(mode='list', length=length(sourceNodes)) for (i in seq_len(length(sourceNodes)) ) { res <- names(dfs(graph=gi, root=sourceNodes[i], unreachable=FALSE)[['order']]) R[[i]] <- res[which(!is.na(res))] } names(R) <- sourceNodes # Keep subpathways with more than two members R <- R[which( lapply(R, function(x) { length(x) }) >= 3 )] if ( length(R) > 0 ) { names(R) <- paste0('sub', seq_len(length(R)) ) } out <- list(R) names(out) <- paste0('subAnalysis.', method) } if ( method %in% subTypes[['linearCases']] ) { sourceMeasure <- gsub('fwd.cascade.', '', method) sourceMeasure <- gsub('bwd.cascade.', '', sourceMeasure) # Find all source nodes nodes <- .measureToNodes(graph=gi, measure=sourceMeasure, org=org, DEgenes=DEgenes ) sourceNodes <- nodes[1 : min(b, length(nodes))] if (length(nodes) == 0) { out <- list(NULL) names(out) <- paste0('subAnalysis.', method) if (verbose) { message('done.') } return(out) } # Simplify graph adjmat <- get.adjacency(gi) gi <- graph.adjacency(triu(adjmat)) # Find all destination nodes destinNodes <- names(sort(which(degree(gi, mode='out', loops=FALSE) == 0), decreasing=TRUE)) sourceNodes <- sourceNodes[!sourceNodes %in% destinNodes] destinNodes <- destinNodes[!destinNodes %in% sourceNodes] # Find all linear subpathways between each pair of start/end nodes N <- length(sourceNodes)*length(destinNodes) lpaths <- vector(mode='list', length=N) lnames <- vector(mode='numeric', length=N) cnt <- 1 lpaths <- NULL if ( N > 0 ) { for ( sourceNode in sourceNodes ) { for ( destinNode in destinNodes ) { lpaths[[cnt]] <- all_simple_paths( gi, from = sourceNode, to = destinNode, mode="out") lnames[cnt] <- paste(sourceNode, destinNode, sep='-') cnt <- cnt + 1 } } combinations <- expand.grid( sourceNodes, destinNodes ) names(lpaths) <- lnames names(lpaths) <- paste0('sub', seq_len(length(lpaths)) ) # Remove NULL results and flatten first level of results lpaths <- do.call(c, lpaths) # Keep subpathways with more than two members lpaths <- lpaths[which(lapply(lpaths, length) > 2)] # Keep genes ids lpaths <- lapply(lpaths, function(x) { names(x) } ) } out <- list(lpaths) names(out) <- paste0('subAnalysis.', method) } if ( method %in% subTypes[['communityCases']] ) { communityHandlers <- c( 'walktrap'=function(x) { cluster_walktrap(x) }, 'edge_betweenness'=function(x) { cluster_edge_betweenness(x) }, 'fast_greedy'=function(x) { cluster_fast_greedy(as.undirected(x)) }, 'leading_eigen'=function(x) { cluster_leading_eigen(as.undirected(x)) }, 'infomap'=function(x) { cluster_infomap(as.undirected(x)) }, 'louvain'=function(x) { cluster_louvain(as.undirected(x)) }) gr <- communityHandlers[[gsub('community.', '', method)]](gi) R <- vector( mode='list', length=length(gr) ) if ( length(gr) > 0 ) { for ( i in seq_len(length(gr)) ) { R[[i]] <- (gr[[i]]) } } if ( length(R) > 0 ) { names(R) <- paste0('sub', seq_len(length(R)) ) } out <- list(R) names(out) <- paste0('subAnalysis.', method) } if ( method %in% subTypes[['componentCases']] ) { if ( method == 'component.max_cliques' ) { gr <- max_cliques(as.undirected(gi), a, b) R <- vector( mode='list', length=length(gr) ) if ( length(gr) > 0 ) { for ( i in seq_len(length(gr)) ) { R[[i]] <- names(unlist(gr[i])) } } } if ( method == 'component.decompose') { gr <- decompose(gi) R <- lapply(gr, function(x) { as_data_frame(x, what="edges") } ) } if ( gsub('-cliques', '', method) != method ) { p <- gsub('-cliques', '', method) k <- as.numeric(gsub('component.', '', p)) # g <- as_graphnel( gi ) g <- igraph.to.graphNEL( gi ) ksubs <- kCliques(ugraph(g))[paste0(k, '-cliques')][[1]] ksubs <- lapply(ksubs, function(x) { matrix(x, nrow=1) } ) # Extract valid pairs edgeList <- as_data_frame(gi, what="edges") # Consider only actual interactions between genes. if ( length(ksubs) > 0 ) { ksubs <- .unlistToMatrix(.fillMatrixList(ksubs)) R <- vector(mode='list', length=nrow(ksubs)) for (j in seq_len(nrow(ksubs)) ) { # Edge list indexes r1 <- as.numeric(is.element( edgeList[,1], ksubs[j, ]) ) r2 <- as.numeric(is.element( edgeList[,2], ksubs[j, ]) ) idx <- which((r1 + r2) == 2) if ( length(idx) > 0 ) { R[[j]] <- unique(as.vector(t(edgeList[idx, ]))) } } }else{ R <- NULL} } if ( gsub('-coreness', '', method) != method ) { p <- gsub('-coreness', '', method) p <- as.numeric(gsub('component.', '', p)) corDist <- coreness(gi) R <- names(corDist)[which(corDist == p)] } if ( length(R) > 0 ) { names(R) <- paste0('sub', seq_len(length(R)) ) } out <- list(R) names(out) <- paste0('subAnalysis.', method) } if (verbose) { message('done.') } return(out) } .getFunctionalNodes <- function( graph, targets, org ) { # Extract nodes from graph and keep term related ones graphGenes <- names(V(graph)) # Unique target genes in entrez uGenesFromTargets <- unique(as.vector(targets)) uGenesFromTargets <- unname(.changeAnnotation(annData=uGenesFromTargets, org='hsa', choice='HGNCtoEntrez')) uGenesFromTargets <- uGenesFromTargets[!is.na(uGenesFromTargets)] # Keep targets intersecting with graph genes targetGenesInGraph <- graphGenes[graphGenes %in% uGenesFromTargets] targetGenesInGraph <- unname(.changeAnnotation(annData=targetGenesInGraph, org='hsa', choice='entrezToHGNC')) # Filter target genes with graph genes idx <- matrix(targets %in% targetGenesInGraph, nrow=nrow(targets))*1 targets[!idx] <- NA nodes <- names(sort(table(targets), decreasing=TRUE)) # Change to entrez gene annotation nodes <- unname(.changeAnnotation(annData=nodes, org=org, choice='HGNCtoEntrez')) return (nodes) } .measureToNodes <- function ( graph, measure, org, DEgenes=NULL ) { topologicalHandlers <- .getTopologicalHandlers() if ( grepl('topological', measure) ) { func <- topologicalHandlers[[gsub('topological.', '', measure)]] nodes <- names(sort(func(graph), decreasing=TRUE)) } if ( measure == 'functional.DEG' ) { nodes <- names(sort(DEgenes, decreasing=FALSE)) } if ( measure %in% paste0('functional.', .getExternalMeasures() ) ) { if ( org != 'hsa' ) { out <- list(NULL) names(out) <- paste0('subAnalysis.', measure) return( out ) } # Find genes with most occurences in the selected term measure <- gsub('functional.', '', measure) targets <- .loadTermData( type=measure ) if ( is.null(targets) ) { return(NULL) } nodes <- .getFunctionalNodes(graph=graph, targets=targets, org=org) } return( nodes ) } .getTopologicalHandlers <- function() { topologicalHandlers <- c( 'degree'=function(x) { degree(x) }, 'betweenness'=function(x) { betweenness(x) }, 'closeness'=function(x) { closeness(x) }, 'hub_score'=function(x) { hub_score(x)[['vector']] }, 'eccentricity'=function(x) { eccentricity(x) }, 'page_rank'=function(x) { page_rank(x)[['vector']] }, 'start_nodes'=function(x) { which(degree(x, mode='in', loops=FALSE) == 0) } ) return( topologicalHandlers ) } # # Data-related functions # .changeAnnotation <- function(annData, org, choice) { if ( org == 'hsa' ) { dir <- system.file('extdata//Data', package='DEsubs') load(paste(dir, 'libraryEntrezToHGNC.RData', sep='//'), e <- new.env()) libraryEntrezToHGNC <- e[['libraryEntrezToHGNC']] if ( choice == 'entrezToHGNC' ) { annData <- libraryEntrezToHGNC[annData] } if ( choice == 'HGNCtoEntrez' ) { libraryHGNCtoEntrez <- names(libraryEntrezToHGNC) names(libraryHGNCtoEntrez) <- libraryEntrezToHGNC annData <- libraryHGNCtoEntrez[annData] } } return(annData) } .getExternalMeasures <- function() { defaultReferences <- .getDefaultReferences() # The default datasets for external measures are stored in the Data # folder within the package directory. The user however can include # any number of gene-sets within cache[['datDir']] directory. # The availiable external measures will be the union of gene-sets in # both directories. files <- list.files(system.file('extdata//Data', package='DEsubs')) otherFiles <- c('libraryEntrezToHGNC.RData', 'edgeLists.RData', 'libraryEntrezToExternalNomenclature.RData') files <- files[-which(files %in% otherFiles)] # Find if any of the default files have been deleted idx <- which(!paste0(defaultReferences, '.RData') %in% files) if ( length(idx) > 0 ) { message( 'References ', paste0(files[idx], collapse=', '), ' missing.') } supportedReferences <- gsub('.RData', '', files) # Search for new gene sets userReferences <- gsub('.RData', '', list.files(cache[['datDir']])) supportedReferences <- unique(c(supportedReferences, userReferences)) return( supportedReferences ) } .getFunctionalMeasures <- function() { return( c('DEG', .getExternalMeasures()) ) } .getDefaultReferences <- function() { # Default external references stored within the package library defaultReferences <- c( 'KEGG', 'GO_bp', 'GO_cc', 'GO_mf', 'Disease_OMIM', 'Disease_GAD', 'Drug_DrugBank', 'miRNA', 'TF') return( defaultReferences ) } # # Base data # .loadTermData <- function( type ) { references <- .getExternalMeasures() defaultReferences <- .getDefaultReferences() userReferences <- references[!references %in% defaultReferences ] if ( type %in% userReferences ) { dir <- cache[['datDir']] } if ( type %in% defaultReferences ) { dir <- system.file('extdata//Data', package='DEsubs') } if ( type %in% references ) { iFile <- paste0( dir, '//' ,type, '.RData' ) load(iFile, e <- new.env()) targetsPerClass <- e[['targetsPerClass']] } else{ message('Type ', type, ' not supported.') } if ( is.null(targetsPerClass) ) { message('Set ', type, ' does not contain valid targers.') } return( targetsPerClass ) }