git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/DEsubs@122029 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,36 @@ |
1 |
+Package: DEsubs |
|
2 |
+Version: 0.99.6 |
|
3 |
+Date: 2015-07-19 |
|
4 |
+Title: DEsubs: an R package for flexible identification of |
|
5 |
+ differentially expressed subpathways using RNA-seq expression |
|
6 |
+ experiments |
|
7 |
+Author: Aristidis G. Vrahatis and Panos Balomenos |
|
8 |
+Maintainer: Aristidis G. Vrahatis <agvrahatis@upatras.gr> and Panos |
|
9 |
+ Balomenos <balomenos@upatras.gr> |
|
10 |
+Description: DEsubs is a network-based systems biology package that |
|
11 |
+ extracts disease-perturbed subpathways within a pathway network |
|
12 |
+ as recorded by RNA-seq experiments. It contains an extensive |
|
13 |
+ and customizable framework covering a broad range of operation |
|
14 |
+ modes at all stages of the subpathway analysis, enabling a |
|
15 |
+ case-specific approach. The operation modes refer to the |
|
16 |
+ pathway network construction and processing, the subpathway |
|
17 |
+ extraction, visualization and enrichment analysis with regard |
|
18 |
+ to various biological and pharmacological features. Its |
|
19 |
+ capabilities render it a tool-guide for both the modeler and |
|
20 |
+ experimentalist for the identification of more robust |
|
21 |
+ systems-level biomarkers for complex diseases. |
|
22 |
+Depends: R (>= 3.3), locfit |
|
23 |
+SystemRequirements: |
|
24 |
+License: GPL-3 |
|
25 |
+Repository: Bioconductor |
|
26 |
+Date/Publication: |
|
27 |
+NeedsCompilation: no |
|
28 |
+LazyLoad: yes |
|
29 |
+Imports: graph, igraph, RBGL, circlize, limma, edgeR, samr, EBSeq, |
|
30 |
+ NBPSeq, DESeq, stats, grDevices, graphics, pheatmap, utils, |
|
31 |
+ ggplot2, Matrix, jsonlite, tools, DESeq2, methods |
|
32 |
+Suggests: RUnit, BiocGenerics, knitr |
|
33 |
+VignetteBuilder: knitr |
|
34 |
+biocViews: SystemsBiology, GraphAndNetwork, Pathways, KEGG, |
|
35 |
+ GeneExpression, NetworkEnrichment, Network, RNASeq, |
|
36 |
+ DifferentialExpression, Normalization |
0 | 37 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,144 @@ |
1 |
+import( 'locfit' ) |
|
2 |
+importFrom( 'DESeq', |
|
3 |
+ 'newCountDataSet', |
|
4 |
+ 'estimateSizeFactors', |
|
5 |
+ 'estimateDispersions', |
|
6 |
+ 'nbinomTest', |
|
7 |
+ 'getVarianceStabilizedData') |
|
8 |
+importFrom( 'DESeq2', |
|
9 |
+ 'DESeqDataSetFromMatrix', |
|
10 |
+ 'estimateSizeFactors', |
|
11 |
+ 'estimateDispersions', |
|
12 |
+ 'nbinomWaldTest', |
|
13 |
+ 'results') |
|
14 |
+importFrom( 'samr', |
|
15 |
+ 'SAMseq' ) |
|
16 |
+importFrom( 'limma', |
|
17 |
+ 'voom', |
|
18 |
+ 'lmFit', |
|
19 |
+ 'eBayes' ) |
|
20 |
+importFrom( 'EBSeq', |
|
21 |
+ 'MedianNorm', |
|
22 |
+ 'EBTest', |
|
23 |
+ 'GetPPMat') |
|
24 |
+importFrom( 'NBPSeq', |
|
25 |
+ 'nbp.test') |
|
26 |
+importFrom( 'edgeR', |
|
27 |
+ 'DGEList', |
|
28 |
+ 'exactTest', |
|
29 |
+ 'calcNormFactors', |
|
30 |
+ 'estimateCommonDisp', |
|
31 |
+ 'estimateTagwiseDisp') |
|
32 |
+importFrom( 'graph', |
|
33 |
+ 'ugraph') |
|
34 |
+importFrom( 'RBGL', |
|
35 |
+ 'kCliques') |
|
36 |
+importFrom( 'igraph', |
|
37 |
+ 'as_graphnel', |
|
38 |
+ 'graph_from_edgelist', |
|
39 |
+ 'as_data_frame', |
|
40 |
+ 'closeness', |
|
41 |
+ 'hub_score', |
|
42 |
+ 'decompose', |
|
43 |
+ 'cluster_walktrap', |
|
44 |
+ 'cluster_edge_betweenness', |
|
45 |
+ 'cluster_fast_greedy', |
|
46 |
+ 'cluster_leading_eigen', |
|
47 |
+ 'cluster_infomap', |
|
48 |
+ 'cluster_louvain', |
|
49 |
+ 'cliques', |
|
50 |
+ 'max_cliques', |
|
51 |
+ 'coreness', |
|
52 |
+ 'components', |
|
53 |
+ 'as.undirected', |
|
54 |
+ 'betweenness', |
|
55 |
+ 'eccentricity', |
|
56 |
+ 'articulation_points', |
|
57 |
+ 'page_rank', |
|
58 |
+ 'degree', |
|
59 |
+ 'dfs', |
|
60 |
+ 'ego', |
|
61 |
+ 'get.edgelist', |
|
62 |
+ 'get.adjacency', |
|
63 |
+ 'graph.adjacency', |
|
64 |
+ 'graph.adjlist', |
|
65 |
+ 'V', |
|
66 |
+ 'E', |
|
67 |
+ 'all_simple_paths', |
|
68 |
+ 'graph.data.frame', |
|
69 |
+ 'simplify', |
|
70 |
+ 'V<-', |
|
71 |
+ 'E<-', |
|
72 |
+ 'layout.davidson.harel', |
|
73 |
+ 'write.graph') |
|
74 |
+importFrom( 'Matrix', |
|
75 |
+ 'triu') |
|
76 |
+importFrom( 'circlize', |
|
77 |
+ 'circos.par', |
|
78 |
+ 'chordDiagram', |
|
79 |
+ 'get.all.sector.index', |
|
80 |
+ 'get.cell.meta.data', |
|
81 |
+ 'circos.text', |
|
82 |
+ 'circos.clear') |
|
83 |
+importFrom( 'grDevices', |
|
84 |
+ 'pdf', |
|
85 |
+ 'png', |
|
86 |
+ 'svg', |
|
87 |
+ 'rainbow', |
|
88 |
+ 'dev.off', |
|
89 |
+ 'colorRampPalette', |
|
90 |
+ 'dev.copy2pdf') |
|
91 |
+importFrom( 'graphics', |
|
92 |
+ 'par', |
|
93 |
+ 'barplot', |
|
94 |
+ 'text', |
|
95 |
+ 'plot') |
|
96 |
+importFrom( 'stats', |
|
97 |
+ 'deviance', |
|
98 |
+ 'glm', |
|
99 |
+ 'hatvalues', |
|
100 |
+ 'pchisq', |
|
101 |
+ 'pf', |
|
102 |
+ 'poisson', |
|
103 |
+ 'qchisq', |
|
104 |
+ 'qf', |
|
105 |
+ 'residuals', |
|
106 |
+ 'p.adjust', |
|
107 |
+ 'phyper', |
|
108 |
+ 'model.matrix', |
|
109 |
+ 'cor', |
|
110 |
+ 'predict') |
|
111 |
+importFrom( 'pheatmap', |
|
112 |
+ 'pheatmap') |
|
113 |
+importFrom( 'utils', |
|
114 |
+ 'browseURL', |
|
115 |
+ 'head', |
|
116 |
+ 'write.table', |
|
117 |
+ 'read.table', |
|
118 |
+ 'capture.output') |
|
119 |
+importFrom( 'ggplot2', |
|
120 |
+ 'qplot', |
|
121 |
+ 'theme_classic', |
|
122 |
+ 'geom_point', |
|
123 |
+ 'scale_colour_gradientn', |
|
124 |
+ 'labs', |
|
125 |
+ 'cut_number', |
|
126 |
+ 'theme', |
|
127 |
+ 'element_text') |
|
128 |
+importFrom( 'jsonlite', |
|
129 |
+ 'toJSON') |
|
130 |
+importFrom( 'tools', |
|
131 |
+ 'file_ext') |
|
132 |
+importFrom( 'methods', |
|
133 |
+ 'representation', |
|
134 |
+ 'setClass', |
|
135 |
+ 'setGeneric', |
|
136 |
+ 'setMethod', |
|
137 |
+ 'signature', |
|
138 |
+ 'new') |
|
139 |
+export( 'DEsubs', |
|
140 |
+ 'geneVisualization', |
|
141 |
+ 'organismVisualization', |
|
142 |
+ 'subpathwayToGraph', |
|
143 |
+ 'subpathwayVisualization', |
|
144 |
+ 'subpathwayTypes' ) |
0 | 145 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,718 @@ |
1 |
+# Gene level measures |
|
2 |
+# |
|
3 |
+ |
|
4 |
+geneVisualization <- function ( DEsubs.out, |
|
5 |
+ measures.topological='all', |
|
6 |
+ measures.functional='all', |
|
7 |
+ measures.barplot=TRUE, |
|
8 |
+ topGenes=10, |
|
9 |
+ colors.topological=colorRampPalette(c("white", "red"))(100), |
|
10 |
+ colors.functional=colorRampPalette(c("white", "red"))(100), |
|
11 |
+ colors.barplot='#C7EDFCFF', |
|
12 |
+ size.topological=c(5,4), |
|
13 |
+ size.functional=c(7,4), |
|
14 |
+ size.barplot=c(5,5), |
|
15 |
+ outfile.topological='', |
|
16 |
+ outfile.functional='', |
|
17 |
+ outfile.barplot='', |
|
18 |
+ export='pdf', |
|
19 |
+ verbose=TRUE) |
|
20 |
+{ |
|
21 |
+ if ( verbose ) |
|
22 |
+ { message('Generating gene-level view...', appendLF = FALSE) } |
|
23 |
+ |
|
24 |
+ edgeList <- DEsubs.out[['edgeList']] |
|
25 |
+ DEgenes <- DEsubs.out[['DEgenes']] |
|
26 |
+ org <- DEsubs.out[['org']] |
|
27 |
+ nomen <- DEsubs.out[['mRNAnomenclature']] |
|
28 |
+ |
|
29 |
+ if ( length(DEgenes) < topGenes ) { topGenes <- length(DEgenes) } |
|
30 |
+ |
|
31 |
+ # Keep top DEgenes |
|
32 |
+ topGenes <- sort(DEgenes)[1:topGenes] |
|
33 |
+ output <- list() |
|
34 |
+ |
|
35 |
+ # Topological analysis |
|
36 |
+ supportedTopologicalMeasures <- c( 'degree', |
|
37 |
+ 'betweenness', |
|
38 |
+ 'closeness', |
|
39 |
+ 'hub_score', |
|
40 |
+ 'eccentricity', |
|
41 |
+ 'page_rank') |
|
42 |
+ # Ontological analysis |
|
43 |
+ supportedFunctionalMeasures <- .getExternalMeasures() |
|
44 |
+ |
|
45 |
+ if ( !is.null(measures.topological) ) |
|
46 |
+ { |
|
47 |
+ if ( 'all' %in% measures.topological ) |
|
48 |
+ { measures.topological <- supportedTopologicalMeasures } |
|
49 |
+ |
|
50 |
+ failTopo <- measures.topological[!measures.topological %in% |
|
51 |
+ supportedTopologicalMeasures] |
|
52 |
+ |
|
53 |
+ if ( length(failTopo) > 0 ) |
|
54 |
+ { message('Topological option(s) ', failTopo, ' not supported.') } |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ if ( !is.null(measures.functional) ) |
|
58 |
+ { |
|
59 |
+ if ( 'all' %in% measures.functional ) |
|
60 |
+ { measures.functional <- supportedFunctionalMeasures } |
|
61 |
+ |
|
62 |
+ failFunc <- measures.functional[!measures.functional %in% |
|
63 |
+ supportedFunctionalMeasures] |
|
64 |
+ |
|
65 |
+ if ( length(failFunc) > 0 ) |
|
66 |
+ { message('Ontological option(s) ', failFunc, ' not supported.') } |
|
67 |
+ } |
|
68 |
+ |
|
69 |
+ # Analysis #1 |
|
70 |
+ topology <- .topologicalMeasures(measures=measures.topological, |
|
71 |
+ edgeList=edgeList, |
|
72 |
+ topGenes=topGenes, |
|
73 |
+ org=org, |
|
74 |
+ visualize=TRUE, |
|
75 |
+ colors=colors.topological, |
|
76 |
+ width=size.topological[1], |
|
77 |
+ height=size.topological[2], |
|
78 |
+ outfile=outfile.topological, |
|
79 |
+ export=export) |
|
80 |
+ |
|
81 |
+ # Analysis #2 |
|
82 |
+ ontology <- .ontologicalMeasures(measures=measures.functional, |
|
83 |
+ topGenes=topGenes, |
|
84 |
+ org=org, |
|
85 |
+ visualize=TRUE, |
|
86 |
+ colors=colors.functional, |
|
87 |
+ width=size.functional[1], |
|
88 |
+ height=size.functional[2], |
|
89 |
+ outfile=outfile.functional, |
|
90 |
+ export=export) |
|
91 |
+ # Analysis #3 |
|
92 |
+ if ( measures.barplot ) |
|
93 |
+ { |
|
94 |
+ |
|
95 |
+ names(topGenes) <- .changeAnnotation(annData=names(topGenes), |
|
96 |
+ org='hsa', choice='entrezToHGNC') |
|
97 |
+ |
|
98 |
+ .matrixVisualization( -log10(topGenes), type='barplot', title='', |
|
99 |
+ colors=colors.barplot, |
|
100 |
+ width=size.barplot[1], |
|
101 |
+ height=size.barplot[2], |
|
102 |
+ outfile=outfile.barplot, |
|
103 |
+ export=export) |
|
104 |
+ } |
|
105 |
+ |
|
106 |
+ output <- c(output, list('measures.topological'=topology) ) |
|
107 |
+ output <- c(output, list('measures.functional'=ontology) ) |
|
108 |
+ |
|
109 |
+ if ( verbose ) |
|
110 |
+ { message('done', appendLF = TRUE) } |
|
111 |
+ |
|
112 |
+ return( output ) |
|
113 |
+} |
|
114 |
+ |
|
115 |
+.topologicalMeasures <- function( edgeList, measures, topGenes, org, |
|
116 |
+ visualize, colors, width, height, outfile, export) |
|
117 |
+{ |
|
118 |
+ if ( is.null(measures) ) |
|
119 |
+ { return(NULL) } |
|
120 |
+ |
|
121 |
+ supportedMeasures <- c( 'degree', |
|
122 |
+ 'betweenness', |
|
123 |
+ 'closeness', |
|
124 |
+ 'hub_score', |
|
125 |
+ 'eccentricity', |
|
126 |
+ 'page_rank') |
|
127 |
+ |
|
128 |
+ if ( missing (measures) ) { measures <- supportedMeasures } |
|
129 |
+ |
|
130 |
+ gi <- graph_from_edgelist(as.matrix(edgeList[, 1:2])) |
|
131 |
+ topo <- list() |
|
132 |
+ topologicalHandlers <- .getTopologicalHandlers() |
|
133 |
+ for ( i in seq_len(length(measures)) ) |
|
134 |
+ { |
|
135 |
+ res <- sort(topologicalHandlers[[measures[i]]](gi), decreasing=TRUE) |
|
136 |
+ topo <- c(topo, list(res)) |
|
137 |
+ } |
|
138 |
+ names(topo) <- measures |
|
139 |
+ |
|
140 |
+ uGenes <- unique(as.vector(as.matrix(edgeList[, 1:2]))) |
|
141 |
+ ranking <- matrix(, nrow=length(uGenes), ncol=length(topo)) |
|
142 |
+ for ( i in seq_len(length(topo)) ) |
|
143 |
+ { |
|
144 |
+ ranking[, i] <- topo[[i]][as.character(uGenes)] |
|
145 |
+ ranking[, i] <- floor(ranking[, i]/max(ranking[, i])*100)/100 |
|
146 |
+ } |
|
147 |
+ rownames(ranking) <- uGenes |
|
148 |
+ colnames(ranking) <- names(topo) |
|
149 |
+ |
|
150 |
+ |
|
151 |
+ # Return info about top genes |
|
152 |
+ if ( !missing(topGenes) && !is.null(topGenes) ) |
|
153 |
+ { |
|
154 |
+ idx <- which( rownames(ranking) %in% names(topGenes) ) |
|
155 |
+ ranking <- ranking[idx, , drop=FALSE] |
|
156 |
+ } |
|
157 |
+ |
|
158 |
+ |
|
159 |
+ if ( visualize ) |
|
160 |
+ { |
|
161 |
+ rownames(ranking) <- .changeAnnotation( annData=rownames(ranking), |
|
162 |
+ org='hsa', |
|
163 |
+ choice='entrezToHGNC') |
|
164 |
+ |
|
165 |
+ .matrixVisualization( dataVis=as.matrix(ranking), |
|
166 |
+ type='heatmap', title='topological', |
|
167 |
+ colors=colors, |
|
168 |
+ width=width, |
|
169 |
+ height=height, |
|
170 |
+ outfile=outfile, |
|
171 |
+ export=export ) |
|
172 |
+ } |
|
173 |
+ |
|
174 |
+ return(ranking) |
|
175 |
+} |
|
176 |
+ |
|
177 |
+.ontologicalMeasures <- function( measures, topGenes, org, visualize, colors, |
|
178 |
+ width, height, outfile, export ) |
|
179 |
+{ |
|
180 |
+ if ( org != 'hsa' ) |
|
181 |
+ { return(NULL) } # Homo sapiens only |
|
182 |
+ if ( is.null(measures) ) |
|
183 |
+ { return(NULL) } |
|
184 |
+ |
|
185 |
+ supportedTerms <- .getExternalMeasures() |
|
186 |
+ |
|
187 |
+ if ( missing (measures) ) { measures <- supportedTerms } |
|
188 |
+ |
|
189 |
+ # Change gene annotation to HGNC for hsa |
|
190 |
+ names(topGenes) <- .changeAnnotation(annData=names(topGenes), org='hsa', |
|
191 |
+ choice='entrezToHGNC') |
|
192 |
+ |
|
193 |
+ ranking <- matrix(, nrow=length(topGenes), ncol=length(measures)) |
|
194 |
+ invalidIdx <- c() |
|
195 |
+ for ( j in seq_len(length(measures)) ) |
|
196 |
+ { |
|
197 |
+ targets <- .loadTermData( type=measures[j] ) |
|
198 |
+ if ( is.null(targets) ) |
|
199 |
+ { |
|
200 |
+ invalidIdx <- c(invalidIdx, j) |
|
201 |
+ next() |
|
202 |
+ } |
|
203 |
+ for ( i in seq_len(length(topGenes)) ) |
|
204 |
+ { |
|
205 |
+ idx <- targets %in% names(topGenes[i]) |
|
206 |
+ idx <- matrix( idx, nrow=nrow(targets) )*1 |
|
207 |
+ ranking[i, j] <- length(which(rowSums( idx ) > 0)) |
|
208 |
+ } |
|
209 |
+ if ( max(ranking[, j]) > 0 ) |
|
210 |
+ { |
|
211 |
+ ranking[, j] <- floor(ranking[, j]/max(ranking[, j])*100)/100 |
|
212 |
+ } |
|
213 |
+ } |
|
214 |
+ rownames(ranking) <- names(topGenes) |
|
215 |
+ colnames(ranking) <- measures |
|
216 |
+ if ( length(invalidIdx) > 0 ) { ranking <- ranking[, -invalidIdx]} |
|
217 |
+ |
|
218 |
+ if ( visualize ) |
|
219 |
+ { |
|
220 |
+ .matrixVisualization( dataVis=(as.matrix(ranking) ), |
|
221 |
+ type='heatmap', title='external.references', |
|
222 |
+ colors=colors, |
|
223 |
+ width=width, |
|
224 |
+ height=height, |
|
225 |
+ outfile=outfile, |
|
226 |
+ export=export ) |
|
227 |
+ } |
|
228 |
+ |
|
229 |
+ return( ranking ) |
|
230 |
+} |
|
231 |
+ |
|
232 |
+ |
|
233 |
+# |
|
234 |
+# Subpathway level measures |
|
235 |
+# |
|
236 |
+ |
|
237 |
+.subLevelMeasures <- function( subpathway, type, topTerms=20 ) |
|
238 |
+{ |
|
239 |
+ supportedTerms <- .getExternalMeasures() |
|
240 |
+ |
|
241 |
+ if ( missing(type) ) |
|
242 |
+ { |
|
243 |
+ type <- supportedTerms |
|
244 |
+ } |
|
245 |
+ |
|
246 |
+ failType <- type[!type %in% supportedTerms] |
|
247 |
+ if ( length(failType) > 0 ) |
|
248 |
+ { |
|
249 |
+ message('Option(s) ', failType, ' not supported.') |
|
250 |
+ } |
|
251 |
+ |
|
252 |
+ # Access local Rdata diles |
|
253 |
+ targets <- .loadTermData( type ) |
|
254 |
+ if (is.null(targets)) { return(NULL) } |
|
255 |
+ |
|
256 |
+ # Calculate p-values for each class |
|
257 |
+ N <- nrow(targets) |
|
258 |
+ pValues <- vector( mode='numeric', length=N ) |
|
259 |
+ uTargets <- unique( as.vector( targets ) ) |
|
260 |
+ res <- matrix(, nrow=N, ncol=5) |
|
261 |
+ for ( i in 1:N ) |
|
262 |
+ { |
|
263 |
+ termTargetsAll <- which(targets[i,] != '0') |
|
264 |
+ termTargetsOfSub <- which(!is.na(match(targets[i,], |
|
265 |
+ subpathway))) |
|
266 |
+ # Hypergeometric test compares sample to background |
|
267 |
+ # Success.sample, Success.bgd, Failure.bgd, Sample.size |
|
268 |
+ A <- length( termTargetsOfSub ) |
|
269 |
+ B <- length( uTargets ) |
|
270 |
+ C <- length( termTargetsAll ) |
|
271 |
+ D <- length( sub ) |
|
272 |
+ pValues[i] <- 1 - phyper(A, C, B-C, D) |
|
273 |
+ pValues[i] <- floor(pValues[i] * 10000)/1000 |
|
274 |
+ } |
|
275 |
+ names(pValues) <- rownames(targets) |
|
276 |
+ |
|
277 |
+ |
|
278 |
+ # Keep significant terms |
|
279 |
+ idx <- which(pValues < 0.05) |
|
280 |
+ if ( length(idx) == 0 ) |
|
281 |
+ { |
|
282 |
+ return(NULL) |
|
283 |
+ } |
|
284 |
+ pValues <- pValues[idx] |
|
285 |
+ targets <- targets[idx, , drop=FALSE] |
|
286 |
+ # Keep top terms |
|
287 |
+ idx <- order(pValues) |
|
288 |
+ pValues <- pValues[idx] |
|
289 |
+ targets <- targets[idx, , drop=FALSE] |
|
290 |
+ # Choose most significant classes |
|
291 |
+ if ( length(targets) == 0 ) { return(NULL) } |
|
292 |
+ if ( length(pValues) < topTerms ) |
|
293 |
+ { topTerms <- length(pValues) } |
|
294 |
+ pValues <- pValues[1:topTerms] |
|
295 |
+ targets <- targets[1:topTerms, , drop=FALSE] |
|
296 |
+ |
|
297 |
+ # Reduce target matrix with subpathway genes by removing rows and |
|
298 |
+ # and columns not containing any subpathway genes |
|
299 |
+ idx <- targets %in% subpathway |
|
300 |
+ idx <- matrix( idx, nrow=nrow(targets) )*1 |
|
301 |
+ targets[!idx] <- '0' |
|
302 |
+ |
|
303 |
+ # Remove rows with no subpathway genes |
|
304 |
+ idx <- targets == '0' |
|
305 |
+ if ( length(idx) > 0 ) |
|
306 |
+ { |
|
307 |
+ idx <- rowSums(idx) != ncol(targets) |
|
308 |
+ targets <- targets[idx, , drop=FALSE] |
|
309 |
+ } |
|
310 |
+ # Remove columns with no subpathway genes |
|
311 |
+ idx <- targets == '0' |
|
312 |
+ if ( length(idx) > 0 ) |
|
313 |
+ { |
|
314 |
+ idx <- colSums(idx) != nrow(targets) |
|
315 |
+ targets <- targets[, idx, drop=FALSE] |
|
316 |
+ } |
|
317 |
+ # Check if any data remains |
|
318 |
+ if ( length(targets) == 0 ) |
|
319 |
+ { return(NULL) } |
|
320 |
+ |
|
321 |
+ # Create edgelist for topmost results (class, subpathway genes) |
|
322 |
+ edgeList <- matrix(, nrow=length(targets), ncol=3) |
|
323 |
+ for ( i in seq_len(nrow(targets)) ) |
|
324 |
+ { |
|
325 |
+ start <- 1 + (i-1)*ncol(targets) |
|
326 |
+ end <- i*ncol(targets) |
|
327 |
+ genesPerTerm <- expand.grid(rownames(targets)[i], targets[i, ] ) |
|
328 |
+ edgeList[start:end, 1:2] <- as.matrix(genesPerTerm) |
|
329 |
+ edgeList[start:end, 3] <- pValues[rownames(targets)[i]] |
|
330 |
+ } |
|
331 |
+ idx <- which(edgeList[, 2] != '0') |
|
332 |
+ edgeList <- edgeList[idx, , drop=FALSE] |
|
333 |
+ |
|
334 |
+ return ( edgeList ) |
|
335 |
+} |
|
336 |
+ |
|
337 |
+ |
|
338 |
+subpathwayVisualization <- function( DEsubs.out, |
|
339 |
+ references='', |
|
340 |
+ submethod, |
|
341 |
+ subname, |
|
342 |
+ colors=c('#FF0000FF', '#FF9900FF', '#CCFF00FF', |
|
343 |
+ '#33FF00FF', '#00FF66FF', '#0066FFFF', |
|
344 |
+ '#3300FFFF', '#CC00FFFF', '#FF0099FF', |
|
345 |
+ '#EE82EEFF'), |
|
346 |
+ scale=1, |
|
347 |
+ shuffleColors=FALSE, |
|
348 |
+ outfiles='', |
|
349 |
+ export='pdf', |
|
350 |
+ verbose=TRUE ) |
|
351 |
+{ |
|
352 |
+ if ( 'all' %in% references ) |
|
353 |
+ { |
|
354 |
+ references <- .getExternalMeasures() |
|
355 |
+ references <- c(references, 'GO', 'Disease', 'Regulator') |
|
356 |
+ } |
|
357 |
+ if ( length(scale) == 1 ) |
|
358 |
+ { |
|
359 |
+ scale <- rep(scale, length(references)) |
|
360 |
+ } |
|
361 |
+ if ( length(references) != length(scale) ) |
|
362 |
+ { |
|
363 |
+ message('Number of references has to be equal to different |
|
364 |
+ scaling optios. Setting scale to 1. ') |
|
365 |
+ scale <- rep(1, length(references)) |
|
366 |
+ } |
|
367 |
+ if ( (!'' %in% outfiles) && (length(outfiles) != length(references) ) ) |
|
368 |
+ { |
|
369 |
+ message('Number of outfiles should be equal to number of references.') |
|
370 |
+ return(NULL) |
|
371 |
+ } |
|
372 |
+ if ( '' %in% outfiles ) { outfiles <- rep('', length(references) ) } |
|
373 |
+ |
|
374 |
+ for ( i in seq_len(length(references)) ) |
|
375 |
+ { |
|
376 |
+ .subpathwayVisualization(DEsubs.out=DEsubs.out, |
|
377 |
+ reference=references[i], |
|
378 |
+ submethod=submethod, |
|
379 |
+ subname=subname, |
|
380 |
+ colors=colors, |
|
381 |
+ scale=scale[i], |
|
382 |
+ shuffleColors=shuffleColors, |
|
383 |
+ outfile=outfiles[i], |
|
384 |
+ export=export, |
|
385 |
+ verbose=verbose |
|
386 |
+ ) |
|
387 |
+ } |
|
388 |
+} |
|
389 |
+ |
|
390 |
+.subpathwayVisualization <- function( DEsubs.out, reference, submethod, |
|
391 |
+ subname, colors, scale, shuffleColors, |
|
392 |
+ outfile, export, verbose) |
|
393 |
+{ |
|
394 |
+ |
|
395 |
+ org <- DEsubs.out[['org']] |
|
396 |
+ edgeList <- DEsubs.out[['edgeList']] |
|
397 |
+ |
|
398 |
+ if ( org != 'hsa' ) |
|
399 |
+ { return(NULL) } # Homo sapiens only |
|
400 |
+ if ( is.null(submethod) ) |
|
401 |
+ { return(NULL) } |
|
402 |
+ |
|
403 |
+ |
|
404 |
+ supportedMethods <- subpathwayTypes() |
|
405 |
+ supportedReferences <- .getExternalMeasures() |
|
406 |
+ supportedReferences <- c(supportedReferences, 'GO', 'Disease', 'Regulator') |
|
407 |
+ |
|
408 |
+ if (verbose) |
|
409 |
+ { |
|
410 |
+ message('Generating subpathway-level view ( ', reference,' )...', |
|
411 |
+ appendLF = FALSE) |
|
412 |
+ } |
|
413 |
+ |
|
414 |
+ unsupportedOptions <- submethod[!submethod %in% supportedMethods] |
|
415 |
+ if ( length(unsupportedOptions) > 0 ) |
|
416 |
+ { |
|
417 |
+ message('Option(s) ', unsupportedOptions, ' not supported.') |
|
418 |
+ return(NULL) |
|
419 |
+ } |
|
420 |
+ unsupportedReferences <- reference[!reference %in% supportedReferences] |
|
421 |
+ if ( length(unsupportedReferences) > 0 ) |
|
422 |
+ { |
|
423 |
+ message('Reference(s) ', unsupportedReferences, ' not supported.') |
|
424 |
+ return(NULL) |
|
425 |
+ } |
|
426 |
+ |
|
427 |
+ |
|
428 |
+ # Subpathway information extraction |
|
429 |
+ submethodName <- paste0('subAnalysis.', submethod) |
|
430 |
+ subpathway.nodelist <- DEsubs.out[[submethodName]][[subname]] |
|
431 |
+ |
|
432 |
+ # Subpathway (nodelist) to edgelist |
|
433 |
+ idx1 <- which(edgeList[, 1] %in% subpathway.nodelist) |
|
434 |
+ idx2 <- which(edgeList[, 2] %in% subpathway.nodelist) |
|
435 |
+ idx <- intersect(idx1, idx2) |
|
436 |
+ subpathway.edgelist <- edgeList[idx, ] |
|
437 |
+ |
|
438 |
+ # Change annotation from entrez to HGNC |
|
439 |
+ subpathway.nodelist <- .changeAnnotation(annData=subpathway.nodelist, |
|
440 |
+ org='hsa', choice='entrezToHGNC') |
|
441 |
+ subpathway.nodelist <- unname(subpathway.nodelist) |
|
442 |
+ |
|
443 |
+ |
|
444 |
+ # Special care for double and triple references |
|
445 |
+ references <- reference |
|
446 |
+ top <- 30 |
|
447 |
+ agap <- 0; bgap <- 0; cgap <- 50 |
|
448 |
+ if ( reference %in% 'GO' ) |
|
449 |
+ { |
|
450 |
+ references <- c('GO_bp', |
|
451 |
+ 'GO_cc', |
|
452 |
+ 'GO_mf') |
|
453 |
+ agap <- 20; bgap <- 20; cgap <- 50 |
|
454 |
+ } |
|
455 |
+ if ( reference %in% 'Disease' ) |
|
456 |
+ { |
|
457 |
+ references <- c('Disease_OMIM', 'Disease_GAD') |
|
458 |
+ agap <- 20; bgap <- 0; cgap <- 50 |
|
459 |
+ } |
|
460 |
+ if ( reference %in% 'Regulator' ) |
|
461 |
+ { |
|
462 |
+ references <- c('miRNA', 'TF') |
|
463 |
+ agap <- 20; bgap <- 0; cgap <- 50 |
|
464 |
+ } |
|
465 |
+ top <- floor(top/length(references)) |
|
466 |
+ |
|
467 |
+ edgeLists <- list() |
|
468 |
+ for ( i in seq_len(length(references)) ) |
|
469 |
+ { |
|
470 |
+ subpathway.edgelist <- .subLevelMeasures( |
|
471 |
+ subpathway=subpathway.nodelist, |
|
472 |
+ type=references[i], |
|
473 |
+ topTerms=top) |
|
474 |
+ if ( !is.null(subpathway.edgelist) ) |
|
475 |
+ { |
|
476 |
+ if ( top > nrow(subpathway.edgelist) ) |
|
477 |
+ { top <- nrow(subpathway.edgelist) } |
|
478 |
+ subpathway.edgelist <- subpathway.edgelist[1:top, , drop=FALSE] |
|
479 |
+ |
|
480 |
+ R <- list(subpathway.edgelist) |
|
481 |
+ names(R) <- references[i] |
|
482 |
+ edgeLists <- c(edgeLists, R) |
|
483 |
+ } |
|
484 |
+ } |
|
485 |
+ |
|
486 |
+ if ( length(edgeLists) == 0 ) |
|
487 |
+ { |
|
488 |
+ if (verbose) { message('done.') } |
|
489 |
+ return(NULL) |
|
490 |
+ } |
|
491 |
+ |
|
492 |
+ # Merge rows and columns indepedently for each edgelist. |
|
493 |
+ rowTerms <- c(); colTerms <- c() |
|
494 |
+ for ( i in seq_len(length(edgeLists)) ) |
|
495 |
+ { |
|
496 |
+ if ( is.null(edgeLists[[i]]) ) { next() } |
|
497 |
+ rowTerms <- unique(c(rowTerms, edgeLists[[i]][, 1])) |
|
498 |
+ colTerms <- unique(c(colTerms, edgeLists[[i]][, 2])) |
|
499 |
+ } |
|
500 |
+ |
|
501 |
+ |
|
502 |
+ edgeList <- do.call(rbind, edgeLists) |
|
503 |
+ adjmat <- matrix(0, nrow=length(rowTerms), ncol=length(colTerms)) |
|
504 |
+ rownames(adjmat) <- rowTerms |
|
505 |
+ colnames(adjmat) <- colTerms |
|
506 |
+ for ( i in seq_len(nrow(edgeList)) ) |
|
507 |
+ { |
|
508 |
+ adjmat[ edgeList[i,1], edgeList[i,2] ] <- 1 |
|
509 |
+ } |
|
510 |
+ |
|
511 |
+ if ( shuffleColors ) |
|
512 |
+ { colors <- sample(colors, nrow(adjmat), replace=TRUE) } |
|
513 |
+ if ( !shuffleColors ) |
|
514 |
+ { colors <- rep(colors, length.out=nrow(adjmat)) } |
|
515 |
+ |
|
516 |
+ |
|
517 |
+ # The user has supplied a custom directory |
|
518 |
+ if ( (outfile != '') && ('pdf' %in% export) ) |
|
519 |
+ { |
|
520 |
+ .dir.create.rec(outfile) |
|
521 |
+ } |
|
522 |
+ # No custom directory has been supplied |
|
523 |
+ if ( (outfile == '') && ('pdf' %in% export) ) |
|
524 |
+ { |
|
525 |
+ dir.create(cache[['outDir']], showWarnings=FALSE, recursive=TRUE) |
|
526 |
+ outfile <- paste0(cache[['outDir']] , '//circos_', reference ,'.pdf') |
|
527 |
+ } |
|
528 |
+ |
|
529 |
+ .doCirclize( mat=adjmat, |
|
530 |
+ colors=colors, |
|
531 |
+ agap=agap, |
|
532 |
+ bgap=bgap, |
|
533 |
+ cgap=cgap, |
|
534 |
+ degree=250, |
|
535 |
+ a=round(10/scale)/10, |
|
536 |
+ b=round(10/scale)/10, |
|
537 |
+ outfile=outfile, |
|
538 |
+ export=export) |
|
539 |
+ |
|
540 |
+ if (verbose) { message('done.') } |
|
541 |
+} |
|
542 |
+ |
|
543 |
+ |
|
544 |
+# |
|
545 |
+# Organism level measures |
|
546 |
+# |
|
547 |
+organismVisualization <- function( DEsubs.out, |
|
548 |
+ references='', |
|
549 |
+ topSubs=10, |
|
550 |
+ topTerms=20, |
|
551 |
+ colors=colorRampPalette(c("white", "red"))(100), |
|
552 |
+ export='pdf', |
|
553 |
+ width=7, |
|
554 |
+ height=6, |
|
555 |
+ outfiles='', |
|
556 |
+ verbose=TRUE ) |
|
557 |
+{ |
|
558 |
+ # Homo sapiens only |
|
559 |
+ org <- DEsubs.out[['org']] |
|
560 |
+ if (org != 'hsa') { return(NULL) } |
|
561 |
+ |
|
562 |
+ if ( 'all' %in% references ) |
|
563 |
+ { |
|
564 |
+ references <- .getExternalMeasures() |
|
565 |
+ } |
|
566 |
+ |
|
567 |
+ if ( (!'' %in% outfiles) && (length(outfiles) != length(references) ) ) |
|
568 |
+ { |
|
569 |
+ message('Number of outfiles should be equal to number of references.') |
|
570 |
+ return(NULL) |
|
571 |
+ } |
|
572 |
+ if ( '' %in% outfiles ) { outfiles <- rep('', length(references) ) } |
|
573 |
+ |
|
574 |
+ for ( i in seq_len(length(references)) ) |
|
575 |
+ { |
|
576 |
+ res <- .organismVisualization(DEsubs.out=DEsubs.out, |
|
577 |
+ references=references[i], |
|
578 |
+ topSubs=topSubs, |
|
579 |
+ topTerms=topTerms, |
|
580 |
+ colors=colors, |
|
581 |
+ export=export, |
|
582 |
+ width=width, |
|
583 |
+ height=height, |
|
584 |
+ outfile=outfiles[i], |
|
585 |
+ verbose=verbose) |
|
586 |
+ } |
|
587 |
+ |
|
588 |
+ |
|
589 |
+ return(invisible()) |
|
590 |
+} |
|
591 |
+ |
|
592 |
+.organismVisualization <- function( DEsubs.out, references, topSubs, topTerms, |
|
593 |
+ colors, export, width, height, |
|
594 |
+ outfile, verbose ) |
|
595 |
+{ |
|
596 |
+ |
|
597 |
+ org <- DEsubs.out[['org']] |
|
598 |
+ type <- references |
|
599 |
+ supportedTerms <- .getExternalMeasures() |
|
600 |
+ |
|
601 |
+ if (verbose) |
|
602 |
+ { |
|
603 |
+ message(paste0('Generating organism-level view ( ', type , ' )...'), |
|
604 |
+ appendLF = FALSE) |
|
605 |
+ } |
|
606 |
+ |
|
607 |
+ failType <- type[!type %in% supportedTerms] |
|
608 |
+ if ( length(failType) > 0 ) |
|
609 |
+ { |
|
610 |
+ message('Option(s) ', failType, ' not supported.') |
|
611 |
+ return(invisible()) |
|
612 |
+ } |
|
613 |
+ |
|
614 |
+ subpathways <- DEsubs.out[[5]] |
|
615 |
+ termsPerSub <- list() |
|
616 |
+ pValuesPerTermPerSub <- list() |
|
617 |
+ pathNames <- c() |
|
618 |
+ |
|
619 |
+ |
|
620 |
+ top <- c(topSubs, topTerms) |
|
621 |
+ names(top) <- c('subpathways', 'terms') |
|
622 |
+ |
|
623 |
+ if ( length(subpathways) < top['subpathways'] ) |
|
624 |
+ { top['subpathways'] <- length(subpathways) } |
|
625 |
+ |
|
626 |
+ subpathways <- subpathways[1:top['subpathways']] |
|
627 |
+ subpathways <- subpathways[!is.na(subpathways)] |
|
628 |
+ offset <- 0.001 |
|
629 |
+ |
|
630 |
+ # Find most significant terms for each subpathway |
|
631 |
+ for ( i in seq_len(length(subpathways)) ) |
|
632 |
+ { |
|
633 |
+ sub <- unname(.changeAnnotation(annData=subpathways[[i]], org=org, |
|
634 |
+ choice='entrezToHGNC')) |
|
635 |
+ edgeList <- .subLevelMeasures(subpathway=sub, type=type, |
|
636 |
+ topTerms=top['subpathways']) |
|
637 |
+ if ( !is.null(edgeList) ) |
|
638 |
+ { |
|
639 |
+ termsPerSub[[i]] <- matrix( edgeList[, 1], nrow=1 ) |
|
640 |
+ pval <- matrix( as.numeric(edgeList[, 3]) + offset, nrow=1 ) |
|
641 |
+ pValuesPerTermPerSub[[i]] <- pval |
|
642 |
+ pathNames <- c(pathNames, names(subpathways)[i] ) |
|
643 |
+ } |
|
644 |
+ } |
|
645 |
+ |
|
646 |
+ if ( length(termsPerSub) == 0) |
|
647 |
+ { |
|
648 |
+ message('done', appendLF = TRUE) |
|
649 |
+ return(NULL) |
|
650 |
+ } |
|
651 |
+ |
|
652 |
+ # Convert lists to adjacency matrices |
|
653 |
+ names(termsPerSub) <- pathNames |
|
654 |
+ termsPerSub <- .unlistToMatrix(.fillMatrixList( termsPerSub ) ) |
|
655 |
+ names(pValuesPerTermPerSub) <- pathNames |
|
656 |
+ pValuesPerTermPerSub <- .fillMatrixList( pValuesPerTermPerSub ) |
|
657 |
+ pValuesPerTermPerSub <- .unlistToMatrix( pValuesPerTermPerSub ) |
|
658 |
+ |
|
659 |
+ # Find topmost terms amongst the terms for all subpathways |
|
660 |
+ uniqueTerms <- unique(as.vector(termsPerSub)) |
|
661 |
+ uniqueTerms <- uniqueTerms[uniqueTerms != '0'] |
|
662 |
+ counts <- vector(mode='numeric', length=length(uniqueTerms)) |
|
663 |
+ for ( i in seq_len(length(uniqueTerms)) ) |
|
664 |
+ { |
|
665 |
+ counts[i] <- length(which(as.vector(termsPerSub) == uniqueTerms[i])) |
|
666 |
+ } |
|
667 |
+ names(counts) <- uniqueTerms |
|
668 |
+ topTerms <- names(head(sort(counts, decreasing=TRUE), top['terms'])) |
|
669 |
+ |
|
670 |
+ # Keep only topmost terms for each subpathway |
|
671 |
+ idx <- termsPerSub %in% topTerms |
|
672 |
+ if ( length(idx) > 0 ) { termsPerSub[!idx] <- NA } |
|
673 |
+ |
|
674 |
+ # Create an edgelist of topmost terms for all subpathways |
|
675 |
+ termsPerSub.edgeList <- matrix(, nrow=length(termsPerSub), ncol=3) |
|
676 |
+ for ( i in seq_len(nrow(termsPerSub)) ) |
|
677 |
+ { |
|
678 |
+ if ( is.na(rownames(termsPerSub)[i]) ) { next() } |
|
679 |
+ |
|
680 |
+ start <- 1 + (i-1)*ncol(termsPerSub) |
|
681 |
+ end <- i*ncol(termsPerSub) |
|
682 |
+ data1 <- expand.grid( rownames(termsPerSub)[i], termsPerSub[i, ] ) |
|
683 |
+ data2 <- expand.grid( rownames(termsPerSub)[i], |
|
684 |
+ pValuesPerTermPerSub[i, ] - offset ) |
|
685 |
+ |
|
686 |
+ data.all <- cbind(data1, data2[, 2]) |
|
687 |
+ termsPerSub.edgeList[start:end, ] <- as.matrix(data.all) |
|
688 |
+ } |
|
689 |
+ |
|
690 |
+ # Remove NA's |
|
691 |
+ idx <- which(is.na(termsPerSub.edgeList[, 2])) |
|
692 |
+ if ( length(idx) > 0 ) |
|
693 |
+ { |
|
694 |
+ termsPerSub.edgeList <- termsPerSub.edgeList[-idx, , drop=FALSE] |
|
695 |
+ } |
|
696 |
+ |
|
697 |
+ termsPerSub.df <- as.data.frame(termsPerSub.edgeList, |
|
698 |
+ stringsAsFactors=FALSE) |
|
699 |
+ colnames(termsPerSub.df) <- c('Subpathway', 'Term', 'pValue') |
|
700 |
+ |
|
701 |
+ termsPerSub.df[, 'pValue'] <- as.numeric(termsPerSub.df[, 'pValue']) |
|
702 |
+ |
|
703 |
+ |
|
704 |
+ .matrixVisualization(dataVis=termsPerSub.df, type='dotplot', |
|
705 |
+ title=c('Subpathway', type, 'Q-values'), |
|
706 |
+ colors=colors, |
|
707 |
+ width=width, height=height, |
|
708 |
+ outfile=outfile, |
|
709 |
+ export=export) |
|
710 |
+ |
|
711 |
+ if (verbose) |
|
712 |
+ { message('done', appendLF = TRUE) } |
|
713 |
+ |
|
714 |
+ |
|
715 |
+ return(termsPerSub.edgeList) |
|
716 |
+} |
|
717 |
+ |
|
718 |
+ |
0 | 719 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,281 @@ |
1 |
+DEsubs <- function( org, mRNAexpr, mRNAnomenclature, pathways, |
|
2 |
+ DEtool, DEpar, CORtool, CORpar, subpathwayType, |
|
3 |
+ rankedList=NULL, verbose=TRUE) |
|
4 |
+{ |
|
5 |
+ # Create the adjacency matrix |
|
6 |
+ dataNet <- .constructNetwork(org=org, |
|
7 |
+ mRNAexpr=mRNAexpr, |
|
8 |
+ mRNAnomenclature=mRNAnomenclature, |
|
9 |
+ pathways=pathways) |
|
10 |
+ mRNAexpr <- dataNet[['mRNAexpr']] |
|
11 |
+ edgeList <- dataNet[['edgeList']] |
|
12 |
+ |
|
13 |
+ # Filter the nodes and the edges of the organism's pathways network |
|
14 |
+ lens <- suppressWarnings(split(1:ncol(mRNAexpr), 1:2)) |
|
15 |
+ lens <- unname(sapply(lens, function(x) { length(x) })) |
|
16 |
+ net <- .pruneNetwork( edgeList=edgeList, |
|
17 |
+ mRNAexpr=mRNAexpr, |
|
18 |
+ DEGchoice=DEtool, |
|
19 |
+ DEGthresh=DEpar, |
|
20 |
+ classes=c(rep(1, lens[1]), rep(2, lens[2]) ), |
|
21 |
+ corr_threshold=CORpar, org=org, |
|
22 |
+ CORtool=CORtool, |
|
23 |
+ rankedList=rankedList, |
|
24 |
+ verbose=verbose) |
|
25 |
+ |
|
26 |
+ edgeList <- net[['edgeList']] |
|
27 |
+ DEgenes <- net[['DEgenes']] |
|
28 |
+ output <- list('org'=org, 'mRNAnomenclature'=mRNAnomenclature, |
|
29 |
+ 'edgeList'=edgeList, 'DEgenes'=DEgenes) |
|
30 |
+ |
|
31 |
+ # Choose specific subpathways |
|
32 |
+ subTypes <- subpathwayTypes(grouping=subpathwayType) |
|
33 |
+ |
|
34 |
+ # Subpathway analysis |
|
35 |
+ for (subType in subTypes) |
|
36 |
+ { |
|
37 |
+ subs <- .subpathwayAnalysis( edgeList=edgeList, |
|
38 |
+ method=subType, |
|
39 |
+ DEgenes=DEgenes, |
|
40 |
+ org=org, |
|
41 |
+ verbose=verbose ) |
|
42 |
+ output <- c(output, subs) |
|
43 |
+ } |
|
44 |
+ |
|
45 |
+ return( output ) |
|
46 |
+} |
|
47 |
+ |
|
48 |
+ |
|
49 |
+.DEanalysis <- function( count.matrix, DEGchoice, classes ) |
|
50 |
+{ |
|
51 |
+ # |
|
52 |
+ # Differential expression analysis using various DE analysis tools from |
|
53 |
+ # |
|
54 |
+ # Soneson,C. and Delorenzi,M. (2013) A comparison of methods for |
|
55 |
+ # differential expression analysis of RNA-seq data. BMC bioinformatics, |
|
56 |
+ # 14(1), 1. |
|
57 |
+ # |
|
58 |
+ |
|
59 |
+ if ( missing(count.matrix) ) { message('Please supply a matrix.') } |
|
60 |
+ if ( missing(classes) ) { message('Please supply the classes.') } |
|
61 |
+ if ( missing(DEGchoice) ) { message('Please supply a type.') } |
|
62 |
+ |
|
63 |
+ supportedMethods <- c('edgeR', 'DESeq', 'EBSeq', 'samr', 'NBPSeq', |
|
64 |
+ 'voom+limma', 'vst+limma', 'TSPM') |
|
65 |
+ |
|
66 |
+ if ( DEGchoice == 'edgeR' ) |
|
67 |
+ { |
|
68 |
+ # run edgeR |
|
69 |
+ edgeR.dgelist <- DGEList(counts=count.matrix, group=factor(classes)) |
|
70 |
+ edgeR.dgelist <- calcNormFactors(edgeR.dgelist, method="TMM") |
|
71 |
+ edgeR.dgelist <- estimateCommonDisp(edgeR.dgelist) |
|
72 |
+ edgeR.dgelist <- estimateTagwiseDisp(edgeR.dgelist, trend="movingave") |
|
73 |
+ edgeR.test <- exactTest(edgeR.dgelist) |
|
74 |
+ edgeR.pvalues <- edgeR.test[['table']][['PValue']] |
|
75 |
+ genes <- rownames(edgeR.test[['table']]) |
|
76 |
+ edgeR.adjpvalues <- p.adjust(edgeR.pvalues, method="BH") |
|
77 |
+ adjpvalues <- edgeR.adjpvalues |
|
78 |
+ names(adjpvalues) <- genes |
|
79 |
+ |
|
80 |
+ return(adjpvalues) |
|
81 |
+ } |
|
82 |
+ if ( DEGchoice == 'DESeq' ) |
|
83 |
+ { |
|
84 |
+ # run DESeq |
|
85 |
+ DESeq.cds <- DESeq::newCountDataSet(countData=count.matrix, |
|
86 |
+ conditions=factor(classes)) |
|
87 |
+ DESeq.cds <- DESeq::estimateSizeFactors(DESeq.cds) |
|
88 |
+ DESeq.cds <- DESeq::estimateDispersions(DESeq.cds, |
|
89 |
+ sharingMode="maximum", method="pooled", |
|
90 |
+ fitType="local") |
|
91 |
+ DESeq.test <- nbinomTest(DESeq.cds, "1", "2") |
|
92 |
+ DESeq.pvalues <- DESeq.test[['pval']] |
|
93 |
+ genes <- DESeq.test[['id']] |
|
94 |
+ DESeq.adjpvalues <- p.adjust(DESeq.pvalues, method="BH") |
|
95 |
+ adjpvalues <- DESeq.adjpvalues |
|
96 |
+ names(adjpvalues) <- genes |
|
97 |
+ |
|
98 |
+ return(adjpvalues) |
|
99 |
+ } |
|
100 |
+ if ( DEGchoice == 'voom+limma' ) |
|
101 |
+ { |
|
102 |
+ # voom+limma |
|
103 |
+ nf <- calcNormFactors(count.matrix, method="TMM") |
|
104 |
+ voom.data <- voom( count.matrix, |
|
105 |
+ design=model.matrix(~factor(classes)), |
|
106 |
+ lib.size=colSums(count.matrix)*nf) |
|
107 |
+ voom.data[['genes']] <- rownames(count.matrix) |
|
108 |
+ voom.fitlimma <- lmFit( voom.data, |
|
109 |
+ design=model.matrix(~factor(classes))) |
|
110 |
+ voom.fitbayes <- eBayes(voom.fitlimma) |
|
111 |
+ voom.pvalues <- voom.fitbayes[['p.value']][, 2] |
|
112 |
+ voom.adjpvalues <- p.adjust(voom.pvalues, method="BH") |
|
113 |
+ voom.genes <- rownames(voom.fitbayes[['p.value']]) |
|
114 |
+ adjpvalues <- voom.adjpvalues |
|
115 |
+ names(adjpvalues) <- voom.genes |
|
116 |
+ |
|
117 |
+ return(adjpvalues) |
|
118 |
+ } |
|
119 |
+ if ( DEGchoice == 'samr' ) |
|
120 |
+ { |
|
121 |
+ # samr |
|
122 |
+ sink( tempfile() ) |
|
123 |
+ SAMseq.test <- suppressMessages(SAMseq(count.matrix, classes, |
|
124 |
+ resp.type="Two class unpaired", |
|
125 |
+ geneid = rownames(count.matrix), |
|
126 |
+ genenames = rownames(count.matrix), |
|
127 |
+ nperms = 100, nresamp = 20, fdr.output = 1)) |
|
128 |
+ SAMseq.result.table <- rbind( |
|
129 |
+ SAMseq.test[['siggenes.table']][['genes.up']], |
|
130 |
+ SAMseq.test[['siggenes.table']][['genes.lo']]) |
|
131 |
+ SAMseq.score <- rep(0, nrow(count.matrix)) |
|
132 |
+ idx <- match(SAMseq.result.table[,1], |
|
133 |
+ rownames(count.matrix)) |
|
134 |
+ SAMseq.score[idx] <- as.numeric(SAMseq.result.table[,3]) |
|
135 |
+ SAMseq.FDR <- rep(1, nrow(count.matrix)) |
|
136 |
+ idx <- match(SAMseq.result.table[,1], |
|
137 |
+ rownames(count.matrix)) |
|
138 |
+ SAMseq.FDR[idx] <- as.numeric(SAMseq.result.table[,5])/100 |
|
139 |
+ adjpvalues <- SAMseq.FDR |
|
140 |
+ genes <- SAMseq.result.table[, 'Gene ID'] |
|
141 |
+ names(adjpvalues) <- genes |
|
142 |
+ |
|
143 |
+ sink() |
|
144 |
+ |
|
145 |
+ return(adjpvalues) |
|
146 |
+ } |
|
147 |
+ if ( DEGchoice == 'EBSeq' ) |
|
148 |
+ { |
|
149 |
+ # run EBSeq |
|
150 |
+ sizes <- MedianNorm(count.matrix) |
|
151 |
+ EBSeq.test <- suppressMessages( EBTest(Data=count.matrix, |
|
152 |
+ Conditions=factor(classes), sizeFactors=sizes, |
|
153 |
+ maxround=10)) |
|
154 |
+ |
|
155 |
+ EBSeq.ppmat <- GetPPMat(EBSeq.test) |
|
156 |
+ EBSeq.probabilities.DE <- EBSeq.ppmat[, "PPDE"] |
|
157 |
+ EBSeq.lFDR <- 1 - EBSeq.ppmat[, "PPDE"] |
|
158 |
+ EBSeq.FDR <- rep(NA, length(EBSeq.lFDR)) |
|
159 |
+ names(EBSeq.FDR) <- names(EBSeq.lFDR) |
|
160 |
+ for (i in seq_len(length(EBSeq.lFDR)) ) |
|
161 |
+ { |
|
162 |
+ idx <- which(EBSeq.lFDR <= EBSeq.lFDR[i]) |
|
163 |
+ EBSeq.FDR[i] <- mean(EBSeq.lFDR[idx]) |
|
164 |
+ } |
|
165 |
+ adjpvalues <- EBSeq.FDR |
|
166 |
+ |
|
167 |
+ return(adjpvalues) |
|
168 |
+ } |
|
169 |
+ if ( DEGchoice == 'vst+limma' ) |
|
170 |
+ { |
|
171 |
+ # vst+limma |
|
172 |
+ DESeq.cds <- newCountDataSet( countData=count.matrix, |
|
173 |
+ conditions=factor(classes)) |
|
174 |
+ DESeq.cds <- estimateSizeFactors(DESeq.cds) |
|
175 |
+ DESeq.cds <- estimateDispersions( DESeq.cds, |
|
176 |
+ method="blind", fitType="local") |
|
177 |
+ DESeq.vst <- getVarianceStabilizedData(DESeq.cds) |
|
178 |
+ DESeq.vst.fitlimma <- lmFit( DESeq.vst, |
|
179 |
+ design=model.matrix(~factor(classes))) |
|
180 |
+ DESeq.vst.fitbayes <- eBayes(DESeq.vst.fitlimma) |
|
181 |
+ DESeq.vst.pvalues <- DESeq.vst.fitbayes[['p.value']][, 2] |
|
182 |
+ genes <- rownames(DESeq.vst.fitbayes[['p.value']]) |
|
183 |
+ DESeq.vst.adjpvalues <- p.adjust(DESeq.vst.pvalues, method="BH") |
|
184 |
+ adjpvalues <- DESeq.vst.adjpvalues |
|
185 |
+ names(adjpvalues) <- genes |
|
186 |
+ |
|
187 |
+ return(adjpvalues) |
|
188 |
+ } |
|
189 |
+ if ( DEGchoice == 'NBPSeq' ) |
|
190 |
+ { |
|
191 |
+ # NBPSeq |
|
192 |
+ NBPSeq.dgelist <- DGEList( counts=count.matrix, |
|
193 |
+ group=factor(classes)) |
|
194 |
+ NBPSeq.dgelist <- calcNormFactors(NBPSeq.dgelist, method="TMM") |
|
195 |
+ NBPSeq.norm.factors <- as.vector( |
|
196 |
+ NBPSeq.dgelist[['samples']][['norm.factors']]) |
|
197 |
+ |
|
198 |
+ capture.output(NBPSeq.test <- nbp.test(counts=count.matrix, |
|
199 |
+ grp.ids=classes, grp1=1,grp2=2, norm.factors=NBPSeq.norm.factors)) |
|
200 |
+ NBPSeq.pvalues <- NBPSeq.test[['p.values']] |
|
201 |
+ NBPSeq.adjpvalues <- NBPSeq.test[['q.values']] |
|
202 |
+ adjpvalues <- NBPSeq.adjpvalues |
|
203 |
+ names(adjpvalues) <- rownames(NBPSeq.test[['counts']]) |
|
204 |
+ |
|
205 |
+ return(adjpvalues) |
|
206 |
+ } |
|
207 |
+ if ( DEGchoice == 'TSPM' ) |
|
208 |
+ { |
|
209 |
+ TSPM.dgelist <- DGEList(counts = count.matrix, group = factor(classes)) |
|
210 |
+ TSPM.dgelist <- calcNormFactors(TSPM.dgelist, method = "TMM") |
|
211 |
+ v1 <- as.vector(TSPM.dgelist[['samples']][['norm.factors']]) |
|
212 |
+ v2 <- as.vector(TSPM.dgelist[['samples']][['lib.size']]) |
|
213 |
+ norm.lib.sizes <- v1 * v2 |
|
214 |
+ TSPM.test <- TSPM(counts = count.matrix, x1 = factor(classes), |
|
215 |
+ x0 = rep(1, length(classes)), lib.size = norm.lib.sizes) |
|
216 |
+ TSPM.pvalues <- TSPM.test[['pvalues']] |
|
217 |
+ TSPM.adjpvalues <- TSPM.test[['padj']] |
|
218 |
+ adjpvalues <- TSPM.adjpvalues |
|
219 |
+ genes <- rownames(TSPM.dgelist[['counts']]) |
|
220 |
+ names(adjpvalues) <- genes |
|
221 |
+ |
|
222 |
+ return(adjpvalues) |
|
223 |
+ } |
|
224 |
+ if ( DEGchoice == 'DESeq2' ) |
|
225 |
+ { |
|
226 |
+ # run DESeq2 |
|
227 |
+ sink(tempfile()) |
|
228 |
+ DESeq2.ds <- DESeq2::DESeqDataSetFromMatrix(countData = count.matrix, |
|
229 |
+ colData = data.frame(condition = factor(classes)), |
|
230 |
+ design = ~ condition) |
|
231 |
+ DESeq2.ds <- DESeq2::estimateSizeFactors( DESeq2.ds ) |
|
232 |
+ DESeq2.ds <- DESeq2::estimateDispersions( DESeq2.ds, fitType="local") |
|
233 |
+ DESeq2.ds <- DESeq2::nbinomWaldTest( DESeq2.ds ) |
|
234 |
+ DESeq2.results <- DESeq2::results(DESeq2.ds ) |
|
235 |
+ adjpvalues <- p.adjust(DESeq2.results[['pvalue']], method="BH") |
|
236 |
+ names(adjpvalues) <- rownames(count.matrix) |
|
237 |
+ sink() |
|
238 |
+ |
|
239 |
+ return(adjpvalues) |
|
240 |
+ } |
|
241 |
+ if ( DEGchoice == 'vst2+limma' ) |
|
242 |
+ { |
|
243 |
+ # vst(DESeq2)+limma |
|
244 |
+ |
|
245 |
+ sink(tempfile()) |
|
246 |
+ DESeq2.ds <- DESeq2::DESeqDataSetFromMatrix(countData = count.matrix, |
|
247 |
+ colData = data.frame(condition = factor(classes)), |
|
248 |
+ design = ~ condition) |
|
249 |
+ DESeq2.ds <- DESeq2::estimateSizeFactors( DESeq2.ds ) |
|
250 |
+ DESeq2.ds <- DESeq2::estimateDispersions( DESeq2.ds, fitType="local") |
|
251 |
+ DESeq2.vst <- DESeq2::getVarianceStabilizedData( DESeq2.ds ) |
|
252 |
+ |
|
253 |
+ DESeq2.vst.fitlimma <- lmFit( DESeq2.vst, |
|
254 |
+ design=model.matrix(~factor(classes))) |
|
255 |
+ DESeq2.vst.fitbayes <- eBayes(DESeq2.vst.fitlimma) |
|
256 |
+ DESeq2.vst.pvalues <- DESeq2.vst.fitbayes[['p.value']][, 2] |
|
257 |
+ genes <- rownames(DESeq2.vst.fitbayes[['p.value']]) |
|
258 |
+ DESeq2.vst.adjpvalues <- p.adjust(DESeq2.vst.pvalues, method="BH") |
|
259 |
+ adjpvalues <- DESeq2.vst.adjpvalues |
|
260 |
+ names(adjpvalues) <- genes |
|
261 |
+ sink() |
|
262 |
+ |
|
263 |
+ return(adjpvalues) |
|
264 |
+ } |
|
265 |
+ if ( !is.null(DEGchoice) ) |
|
266 |
+ { |
|
267 |
+ unsupportedOptions <- DEGchoice[!DEGchoice %in% supportedMethods] |
|
268 |
+ if ( length(unsupportedOptions) > 0 ) |
|
269 |
+ { |
|
270 |
+ message('Option ', unsupportedOptions, ' not supported.') |
|
271 |
+ message('Supported options are ', |
|
272 |
+ paste0(supportedMethods, collapse=', '), '.') |
|
273 |
+ return( NULL ) |
|
274 |
+ } |
|
275 |
+ } |
|
276 |
+ |
|
277 |
+ |
|
278 |
+ |
|
279 |
+ return( NULL ) |
|
280 |
+} |
|
281 |
+ |
0 | 282 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,133 @@ |
1 |
+##------------------------------------------------------------------- |
|
2 |
+## Name: TSPM.R |
|
3 |
+## R code for the paper by Paul L. Auer and R.W. Doerge: |
|
4 |
+## "A Two-Stage Poisson Model for Testing RNA-Seq Data" |
|
5 |
+## Date: February 2011 |
|
6 |
+## Contact: Paul Auer plivermo@fhcrc.org |
|
7 |
+## R.W. Doerge doerge@purdue.edu |
|
8 |
+ |
|
9 |
+ |
|
10 |
+## Example: |
|
11 |
+## counts <- matrix(0, nrow=1000, ncol=10) |
|
12 |
+## for(i in 1:1000){ |
|
13 |
+## lambda <- rpois(n=1, lambda=10) |
|
14 |
+## counts[i,] <- rpois(n=10, lambda=lambda) |
|
15 |
+## } |
|
16 |
+## x1 <- gl(n=2, k=5, labels=c("T", "C")) |
|
17 |
+## x0 <- rep(1, times=10) |
|
18 |
+## lib.size <- apply(counts,2,sum) |
|
19 |
+## result <- TSPM(counts, x1, x0, lib.size) |
|
20 |
+##--------------------------------------------------------------------- |
|
21 |
+ |
|
22 |
+ |
|
23 |
+####################################################################### |
|
24 |
+###### The TSPM function ############################################## |
|
25 |
+####################################################################### |
|
26 |
+ |
|
27 |
+ |
|
28 |
+TSPM <- function(counts, x1, x0, lib.size, alpha.wh=0.05){ |
|
29 |
+ |
|
30 |
+## Input: |
|
31 |
+#counts: a matrix of RNA-Seq gene counts (genes are rows, samples are |
|
32 |
+# columns) |
|
33 |
+#x1: a vector of treatment group factors (under the alternative |
|
34 |
+# hypothesis) |
|
35 |
+#x0: a vector of treatment group factors (under the null hypothesis) |
|
36 |
+#lib.size: a vector of RNA-Seq library sizes. This could simply be obtained |
|
37 |
+# by specifying lib.size <- apply(counts,2,sum). It may also be any |
|
38 |
+# other appropriate scaling factor. |
|
39 |
+#alpha.wh: the significance threshold to use for deciding whether a gene is |
|
40 |
+# overdispersed. |
|
41 |
+# Defaults to 0.05. |
|
42 |
+ |
|
43 |
+ |
|
44 |
+## Output: |
|
45 |
+#log.fold.change: a vector containing the estimated log fold changes for |
|
46 |
+# each gene |
|
47 |
+#pvalues: a vector containing the raw p-values testing differential |
|
48 |
+# expression for each gene. |
|
49 |
+#index.over.disp: a vector of integer values containing the indices of the |
|
50 |
+# over-dispersed genes. |
|
51 |
+#index.not.over.disp: a vector of integer values containing the indices of the |
|
52 |
+# non-over-dispersed genes. |
|
53 |
+#padj: a vector containing the p-values after adjusting for |
|
54 |
+# multiple testing using the |
|
55 |
+# method of Benjamini-Hochberg |
|
56 |
+ |
|
57 |
+ |
|
58 |
+######## The main loop that fits the GLMs to each gene ##################### |
|
59 |
+ |
|
60 |
+### Initializing model parameters #### |
|
61 |
+n <- dim(counts)[1] |
|
62 |
+per.gene.disp <- NULL |
|
63 |
+LRT <- NULL |
|
64 |
+score.test <- NULL |
|
65 |
+LFC <- NULL |
|
66 |
+ |
|
67 |
+###### Fitting the GLMs for each gene ################# |
|
68 |
+ for(i in 1:n){ |
|
69 |
+ ### Fit full and reduced models ### |
|
70 |
+ model.1 <- glm(as.numeric(counts[i,]) ~ x1, offset=log(lib.size), |
|
71 |
+ family=poisson) |
|
72 |
+ model.0 <- glm(as.numeric(counts[i,]) ~ x0, offset=log(lib.size), |
|
73 |
+ family=poisson) |
|
74 |
+ |
|
75 |
+ ### Obtain diagonals of Hat matrix from the full model fit ### |
|
76 |
+ hats <- hatvalues(model.1) |
|
77 |
+ |
|
78 |
+ ### Obtain Pearson overdispersion estimate #### |
|
79 |
+ per.gene.disp[i] <- sum(residuals(model.1, |
|
80 |
+ type="pearson")^2)/model.1$df.residual |
|
81 |
+ |
|
82 |
+ ### Obtain Likelihood ratio statistic #### |
|
83 |
+ LRT[i] <- deviance(model.0)-deviance(model.1) |
|
84 |
+ |
|
85 |
+ ### Obtain score test statistic #### |
|
86 |
+ score.test[i] <- 1/(2*length(counts[i,])) * sum(residuals(model.1, |
|
87 |
+ type="pearson")^2 - ((counts[i,] - |
|
88 |
+ hats*model.1$fitted.values)/model.1$fitted.values))^2 |
|
89 |
+ |
|
90 |
+ ### Obtain the estimated log fold change ### |
|
91 |
+ LFC[i] <- -model.1$coef[2] |
|
92 |
+ } |
|
93 |
+ |
|
94 |
+## Initialize parameters for Working-Hotelling bands around the score TSs ### |
|
95 |
+qchi <- qchisq(df=1, (1:n-0.5)/n) |
|
96 |
+MSE <- 2 |
|
97 |
+UL <- NULL |
|
98 |
+ |
|
99 |
+#### Obtain the upper boundary of the WH bands ############################ |
|
100 |
+xbar <- mean(qchi) |
|
101 |
+bottom <- sum((qchi-xbar)^2) |
|
102 |
+top <- (qchi-xbar)^2 |
|
103 |
+s <- sqrt(MSE*(1/n) + (top/bottom)) |
|
104 |
+W <- sqrt(2*qf(df1=1, df2=n-1, p=1-(alpha.wh/n))) |
|
105 |
+UL <- pmax(qchi + W*s,1) |
|
106 |
+ |
|
107 |
+###### Obtain the indices of the over-dispersed and not-over-dispersed genes, |
|
108 |
+# respectively ########## |
|
109 |
+ |
|
110 |
+cutoff <- min(which(sort(score.test)-UL > 0)) |
|
111 |
+temp <- cutoff-1 + seq(cutoff:length(score.test)) |
|
112 |
+over.disp <- which(score.test %in% sort(score.test)[temp]) |
|
113 |
+not.over.disp <- setdiff(1:length(score.test), over.disp) |
|
114 |
+ |
|
115 |
+###### Compute p-values #################################### |
|
116 |
+p.f <- pf(LRT[over.disp]/per.gene.disp[over.disp], df1=1, |
|
117 |
+ df2=model.1$df.residual, lower.tail=FALSE) |
|
118 |
+p.chi <- pchisq(LRT[not.over.disp], df=1, lower.tail=FALSE) |
|
119 |
+p <- NULL |
|
120 |
+p[over.disp] <- p.f |
|
121 |
+p[not.over.disp] <- p.chi |
|
122 |
+ |
|
123 |
+##### Adjust the p-values using the B-H method #################### |
|
124 |
+p.bh.f <- p.adjust(p.f, method="BH") |
|
125 |
+p.bh.chi <- p.adjust(p.chi, method="BH") |
|
126 |
+final.p.bh.tagwise <- NULL |
|
127 |
+final.p.bh.tagwise[over.disp] <- p.bh.f |
|
128 |
+final.p.bh.tagwise[not.over.disp] <- p.bh.chi |
|
129 |
+ |
|
130 |
+### Output ### |
|
131 |
+list(log.fold.change=LFC, pvalues=p, index.over.disp=over.disp, |
|
132 |
+ index.not.over.disp=not.over.disp, padj=final.p.bh.tagwise) |
|
133 |
+} |
0 | 134 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,210 @@ |
1 |
+.constructNetwork <- function( org, mRNAexpr, mRNAnomenclature='entrezgene', |
|
2 |
+ pathways='all') |
|
3 |
+{ |
|
4 |
+ # If a filename is given, read a text file from the 'User' directory. |
|
5 |
+ if ( class(mRNAexpr) == 'character' ) |
|
6 |
+ { |
|
7 |
+ dir <- paste0(cache[['baseDir']], '//User') |
|
8 |
+ mRNAexpr <- readLines(paste(dir, mRNAexpr, sep='//')) |
|
9 |
+ mRNAexpr <- strsplit(mRNAexpr, '\t') |
|
10 |
+ mRNAexpr <- do.call('rbind', mRNAexpr) |
|
11 |
+ rownames(mRNAexpr) <- mRNAexpr[, 1] |
|
12 |
+ mRNAexpr <- mRNAexpr[, -1] |
|
13 |
+ class(mRNAexpr) <- 'numeric' |
|
14 |
+ } |
|
15 |
+ |
|
16 |
+ # Change nomenclature if necessary |
|
17 |
+ |
|
18 |
+ if ( mRNAnomenclature != 'entrezgene' ) |
|
19 |
+ { |
|
20 |
+ file <- 'extdata//Data//libraryEntrezToExternalNomenclature.RData' |
|
21 |
+ file <- system.file(file, package='DEsubs') |
|
22 |
+ load(file, e <- new.env()) |
|
23 |
+ orgLib <- e[['libraryEntrezToExternalNomenclature']][[org]] |
|
24 |
+ lib <- orgLib[[mRNAnomenclature]] |
|
25 |
+ |
|
26 |
+ # Keep rows with entrez ids |
|
27 |
+ idx <- which( as.character(rownames(mRNAexpr)) %in% lib[, 2] ) |
|
28 |
+ mRNAexpr <- mRNAexpr[idx, ] |
|
29 |
+ |
|
30 |
+ # Convert to entrez ids |
|
31 |
+ xlib <- lib[, 1] |
|
32 |
+ names(xlib) <- lib[, 2] |
|
33 |
+ rownames(mRNAexpr) <- xlib[rownames(mRNAexpr)] |
|
34 |
+ } |
|
35 |
+ |
|
36 |
+ # Create pathway graph |
|
37 |
+ file <- system.file('extdata//Data//edgeLists.RData', package='DEsubs') |
|
38 |
+ load(file, e <- new.env()) |
|
39 |
+ edgeList <- e[['edgeLists']][[org]] |
|
40 |
+ |
|
41 |
+ # Filter edgelist according to pathway type |
|
42 |
+ idx <- sapply(edgeList, is.factor) |
|
43 |
+ edgeList[idx] <- lapply(edgeList[idx], as.character) |
|
44 |
+ |
|
45 |
+ pathIds <- as.numeric(edgeList[, 4]) |
|
46 |
+ if ( pathways == 'Metabolic') |
|
47 |
+ { edgeList <- edgeList[which(pathIds < 2000), ] } |
|
48 |
+ if ( pathways == 'Non-Metabolic') |
|
49 |
+ { edgeList <- edgeList[which(pathIds >= 2000), ] } |
|
50 |
+ |
|
51 |
+ # Filter edgelist with gene within the RNA-seq data |
|
52 |
+ idx1 <- which(edgeList[, 1] %in% rownames(mRNAexpr)) |
|
53 |
+ idx2 <- which(edgeList[, 2] %in% rownames(mRNAexpr)) |
|
54 |
+ idx <- intersect(idx1, idx2) |
|
55 |
+ edgeList <- edgeList[idx, ] |
|
56 |
+ |
|
57 |
+ |
|
58 |
+ return( list(mRNAexpr=mRNAexpr, edgeList=edgeList) ) |
|
59 |
+} |
|
60 |
+ |
|