Browse code

Adding FitHiC, SIMLR, SPLINTER, GOpro, DEsubs, statTarget, CancerInSilico, geneXtendeR

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/DEsubs@122029 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 07/10/2016 08:32:34
Showing 43 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,36 @@
1
+Package: DEsubs
2
+Version: 0.99.6
3
+Date: 2015-07-19
4
+Title: DEsubs: an R package for flexible identification of
5
+        differentially expressed subpathways using RNA-seq expression
6
+        experiments
7
+Author: Aristidis G. Vrahatis and Panos Balomenos
8
+Maintainer: Aristidis G. Vrahatis <agvrahatis@upatras.gr> and Panos
9
+        Balomenos <balomenos@upatras.gr>
10
+Description: DEsubs is a network-based systems biology package that
11
+        extracts disease-perturbed subpathways within a pathway network
12
+        as recorded by RNA-seq experiments. It contains an extensive
13
+        and customizable framework covering a broad range of operation
14
+        modes at all stages of the subpathway analysis, enabling a
15
+        case-specific approach. The operation modes refer to the
16
+        pathway network construction and processing, the subpathway
17
+        extraction, visualization and enrichment analysis with regard
18
+        to various biological and pharmacological features. Its
19
+        capabilities render it a tool-guide for both the modeler and
20
+        experimentalist for the identification of more robust
21
+        systems-level biomarkers for complex diseases.
22
+Depends: R (>= 3.3), locfit
23
+SystemRequirements:
24
+License: GPL-3
25
+Repository: Bioconductor
26
+Date/Publication:
27
+NeedsCompilation: no
28
+LazyLoad: yes
29
+Imports: graph, igraph, RBGL, circlize, limma, edgeR, samr, EBSeq,
30
+        NBPSeq, DESeq, stats, grDevices, graphics, pheatmap, utils,
31
+        ggplot2, Matrix, jsonlite, tools, DESeq2, methods
32
+Suggests: RUnit, BiocGenerics, knitr
33
+VignetteBuilder: knitr
34
+biocViews: SystemsBiology, GraphAndNetwork, Pathways, KEGG,
35
+        GeneExpression, NetworkEnrichment, Network, RNASeq,
36
+        DifferentialExpression, Normalization
0 37
new file mode 100644
... ...
@@ -0,0 +1,144 @@
1
+import( 'locfit' )
2
+importFrom( 'DESeq',
3
+                'newCountDataSet',
4
+                'estimateSizeFactors',
5
+                'estimateDispersions',
6
+                'nbinomTest',
7
+                'getVarianceStabilizedData')
8
+importFrom( 'DESeq2',
9
+                'DESeqDataSetFromMatrix',
10
+                'estimateSizeFactors',
11
+                'estimateDispersions',
12
+                'nbinomWaldTest',
13
+                'results')
14
+importFrom( 'samr', 
15
+                'SAMseq' )
16
+importFrom( 'limma', 
17
+                'voom', 
18
+                'lmFit', 
19
+                'eBayes' )
20
+importFrom( 'EBSeq', 
21
+                'MedianNorm',
22
+                'EBTest',
23
+                'GetPPMat')
24
+importFrom( 'NBPSeq',
25
+                'nbp.test')
26
+importFrom( 'edgeR', 
27
+                'DGEList',
28
+                'exactTest', 
29
+                'calcNormFactors', 
30
+                'estimateCommonDisp', 
31
+                'estimateTagwiseDisp')
32
+importFrom( 'graph',
33
+                'ugraph')
34
+importFrom( 'RBGL',
35
+                'kCliques')
36
+importFrom( 'igraph',
37
+                'as_graphnel',
38
+                'graph_from_edgelist',
39
+                'as_data_frame',
40
+                'closeness',
41
+                'hub_score',
42
+                'decompose',
43
+                'cluster_walktrap',
44
+                'cluster_edge_betweenness',
45
+                'cluster_fast_greedy',
46
+                'cluster_leading_eigen',
47
+                'cluster_infomap',
48
+                'cluster_louvain',
49
+                'cliques',
50
+                'max_cliques',
51
+                'coreness',
52
+                'components',
53
+                'as.undirected',
54
+                'betweenness',
55
+                'eccentricity',
56
+                'articulation_points',
57
+                'page_rank',
58
+                'degree',
59
+                'dfs',
60
+                'ego',
61
+                'get.edgelist',
62
+                'get.adjacency',
63
+                'graph.adjacency',
64
+                'graph.adjlist',
65
+                'V',
66
+                'E',
67
+                'all_simple_paths',
68
+                'graph.data.frame',
69
+                'simplify',
70
+                'V<-',
71
+                'E<-',
72
+                'layout.davidson.harel',
73
+                'write.graph')
74
+importFrom( 'Matrix',
75
+                'triu')
76
+importFrom( 'circlize',
77
+                'circos.par',
78
+                'chordDiagram',
79
+                'get.all.sector.index',
80
+                'get.cell.meta.data',
81
+                'circos.text',
82
+                'circos.clear')
83
+importFrom( 'grDevices',
84
+                'pdf',
85
+                'png',
86
+                'svg',
87
+                'rainbow',
88
+                'dev.off',
89
+                'colorRampPalette',
90
+                'dev.copy2pdf')
91
+importFrom( 'graphics',
92
+                'par',
93
+                'barplot',
94
+                'text',
95
+                'plot')
96
+importFrom( 'stats', 
97
+                'deviance', 
98
+                'glm', 
99
+                'hatvalues', 
100
+                'pchisq', 
101
+                'pf',
102
+                'poisson', 
103
+                'qchisq', 
104
+                'qf', 
105
+                'residuals',
106
+                'p.adjust',
107
+                'phyper',
108
+                'model.matrix',
109
+                'cor',
110
+                'predict')
111
+importFrom( 'pheatmap',
112
+                'pheatmap')
113
+importFrom( 'utils',
114
+                'browseURL',
115
+                'head',
116
+                'write.table',
117
+                'read.table',
118
+                'capture.output')
119
+importFrom( 'ggplot2',
120
+                'qplot',
121
+                'theme_classic',
122
+                'geom_point',
123
+                'scale_colour_gradientn',
124
+                'labs',
125
+                'cut_number',
126
+                'theme',
127
+                'element_text')
128
+importFrom( 'jsonlite',
129
+                'toJSON')
130
+importFrom(  'tools',
131
+                'file_ext')
132
+importFrom(  'methods',
133
+                'representation',
134
+                'setClass',
135
+                'setGeneric',
136
+                'setMethod',
137
+                'signature',
138
+                'new')
139
+export( 'DEsubs', 
140
+                'geneVisualization', 
141
+                'organismVisualization',
142
+                'subpathwayToGraph',
143
+                'subpathwayVisualization',
144
+                'subpathwayTypes' )
0 145
new file mode 100644
... ...
@@ -0,0 +1,718 @@
1
+# Gene level measures
2
+#
3
+
4
+geneVisualization <- function ( DEsubs.out, 
5
+                measures.topological='all', 
6
+                measures.functional='all',
7
+                measures.barplot=TRUE, 
8
+                topGenes=10,
9
+                colors.topological=colorRampPalette(c("white", "red"))(100),
10
+                colors.functional=colorRampPalette(c("white", "red"))(100),
11
+                colors.barplot='#C7EDFCFF',
12
+                size.topological=c(5,4),
13
+                size.functional=c(7,4),
14
+                size.barplot=c(5,5),
15
+                outfile.topological='',
16
+                outfile.functional='',
17
+                outfile.barplot='',
18
+                export='pdf',
19
+                verbose=TRUE)    
20
+{
21
+    if ( verbose )
22
+        { message('Generating gene-level view...', appendLF = FALSE) }
23
+    
24
+    edgeList <- DEsubs.out[['edgeList']]
25
+    DEgenes  <- DEsubs.out[['DEgenes']]
26
+    org      <- DEsubs.out[['org']]
27
+    nomen    <- DEsubs.out[['mRNAnomenclature']]
28
+
29
+    if ( length(DEgenes) < topGenes ) { topGenes <- length(DEgenes) }
30
+
31
+    # Keep top DEgenes
32
+    topGenes <- sort(DEgenes)[1:topGenes]
33
+    output   <- list()
34
+
35
+    # Topological analysis
36
+    supportedTopologicalMeasures <- c(  'degree', 
37
+                                        'betweenness', 
38
+                                        'closeness',
39
+                                        'hub_score', 
40
+                                        'eccentricity',
41
+                                        'page_rank')
42
+    # Ontological analysis
43
+    supportedFunctionalMeasures <- .getExternalMeasures()
44
+
45
+    if ( !is.null(measures.topological) )
46
+    {
47
+        if ( 'all' %in% measures.topological )
48
+            { measures.topological <- supportedTopologicalMeasures }
49
+
50
+        failTopo <- measures.topological[!measures.topological %in% 
51
+                                        supportedTopologicalMeasures]
52
+
53
+        if ( length(failTopo) > 0 )
54
+            { message('Topological option(s) ', failTopo, ' not supported.') }
55
+    }
56
+
57
+    if ( !is.null(measures.functional) )
58
+    {
59
+        if ( 'all' %in% measures.functional )
60
+            { measures.functional <- supportedFunctionalMeasures }
61
+
62
+        failFunc <- measures.functional[!measures.functional %in% 
63
+                                        supportedFunctionalMeasures]
64
+
65
+        if ( length(failFunc) > 0 )
66
+            { message('Ontological option(s) ', failFunc, ' not supported.') }
67
+    }
68
+
69
+    # Analysis #1
70
+    topology <- .topologicalMeasures(measures=measures.topological,
71
+                                    edgeList=edgeList, 
72
+                                    topGenes=topGenes, 
73
+                                    org=org,
74
+                                    visualize=TRUE,
75
+                                    colors=colors.topological,
76
+                                    width=size.topological[1],
77
+                                    height=size.topological[2],
78
+                                    outfile=outfile.topological,
79
+                                    export=export)
80
+
81
+    # Analysis #2
82
+    ontology <- .ontologicalMeasures(measures=measures.functional,
83
+                                    topGenes=topGenes, 
84
+                                    org=org,
85
+                                    visualize=TRUE,
86
+                                    colors=colors.functional,
87
+                                    width=size.functional[1],
88
+                                    height=size.functional[2],
89
+                                    outfile=outfile.functional,
90
+                                    export=export)
91
+    # Analysis #3
92
+    if (  measures.barplot )
93
+    {
94
+
95
+        names(topGenes) <- .changeAnnotation(annData=names(topGenes), 
96
+                                    org='hsa', choice='entrezToHGNC')
97
+
98
+        .matrixVisualization( -log10(topGenes), type='barplot', title='',
99
+                            colors=colors.barplot, 
100
+                            width=size.barplot[1],
101
+                            height=size.barplot[2],
102
+                            outfile=outfile.barplot,
103
+                            export=export)
104
+    }
105
+
106
+    output <- c(output, list('measures.topological'=topology) )
107
+    output <- c(output, list('measures.functional'=ontology) )
108
+
109
+    if ( verbose )
110
+        { message('done', appendLF = TRUE) }
111
+
112
+    return( output )
113
+}
114
+
115
+.topologicalMeasures <- function( edgeList, measures, topGenes, org, 
116
+                        visualize, colors, width, height, outfile, export)
117
+{
118
+    if ( is.null(measures) ) 
119
+    { return(NULL) } 
120
+
121
+    supportedMeasures <- c( 'degree', 
122
+                            'betweenness', 
123
+                            'closeness',
124
+                            'hub_score', 
125
+                            'eccentricity',
126
+                            'page_rank')
127
+
128
+    if ( missing (measures) ) { measures <- supportedMeasures }
129
+
130
+    gi   <- graph_from_edgelist(as.matrix(edgeList[, 1:2]))
131
+    topo <- list()
132
+    topologicalHandlers <- .getTopologicalHandlers()
133
+    for ( i in seq_len(length(measures)) )
134
+    {
135
+        res <- sort(topologicalHandlers[[measures[i]]](gi), decreasing=TRUE)
136
+        topo <- c(topo, list(res))
137
+    }
138
+    names(topo) <- measures
139
+
140
+    uGenes  <- unique(as.vector(as.matrix(edgeList[, 1:2])))
141
+    ranking <- matrix(, nrow=length(uGenes), ncol=length(topo))
142
+    for ( i in seq_len(length(topo)) )
143
+    {
144
+        ranking[, i] <- topo[[i]][as.character(uGenes)]
145
+        ranking[, i] <- floor(ranking[, i]/max(ranking[, i])*100)/100
146
+    }
147
+    rownames(ranking) <- uGenes
148
+    colnames(ranking) <- names(topo)
149
+
150
+
151
+    # Return info about top genes
152
+    if ( !missing(topGenes) && !is.null(topGenes) )
153
+    {
154
+        idx     <- which( rownames(ranking) %in% names(topGenes) )
155
+        ranking <- ranking[idx, , drop=FALSE]
156
+    }
157
+
158
+
159
+    if ( visualize )
160
+    {
161
+        rownames(ranking) <- .changeAnnotation( annData=rownames(ranking), 
162
+                                                org='hsa',
163
+                                                choice='entrezToHGNC')
164
+
165
+        .matrixVisualization( dataVis=as.matrix(ranking), 
166
+                                type='heatmap', title='topological',
167
+                                colors=colors,
168
+                                width=width,
169
+                                height=height,
170
+                                outfile=outfile,
171
+                                export=export )
172
+    }
173
+
174
+    return(ranking)
175
+}
176
+
177
+.ontologicalMeasures <- function( measures, topGenes, org, visualize, colors,
178
+                        width, height, outfile, export )
179
+{
180
+    if ( org != 'hsa' ) 
181
+        { return(NULL) } # Homo sapiens only
182
+    if ( is.null(measures) ) 
183
+        { return(NULL) } 
184
+
185
+    supportedTerms <- .getExternalMeasures()
186
+
187
+    if ( missing (measures) ) { measures <- supportedTerms }
188
+
189
+    # Change gene annotation to HGNC for hsa 
190
+    names(topGenes) <- .changeAnnotation(annData=names(topGenes), org='hsa',
191
+                                        choice='entrezToHGNC')
192
+
193
+    ranking <- matrix(, nrow=length(topGenes), ncol=length(measures))
194
+    invalidIdx <- c()
195
+    for ( j in seq_len(length(measures)) )
196
+    {
197
+        targets <- .loadTermData( type=measures[j] )
198
+        if ( is.null(targets) )
199
+        {
200
+            invalidIdx <- c(invalidIdx, j)
201
+            next()
202
+        }
203
+        for ( i in seq_len(length(topGenes)) )
204
+        {
205
+            idx <- targets %in% names(topGenes[i])
206
+            idx <- matrix( idx, nrow=nrow(targets) )*1
207
+            ranking[i, j] <- length(which(rowSums( idx ) > 0))
208
+        }
209
+        if ( max(ranking[, j]) > 0 )
210
+        {
211
+            ranking[, j] <- floor(ranking[, j]/max(ranking[, j])*100)/100
212
+        }
213
+    }
214
+    rownames(ranking) <- names(topGenes)
215
+    colnames(ranking) <- measures
216
+    if ( length(invalidIdx) > 0 ) { ranking <- ranking[, -invalidIdx]}
217
+
218
+    if ( visualize )
219
+    {
220
+        .matrixVisualization( dataVis=(as.matrix(ranking) ), 
221
+                            type='heatmap', title='external.references',
222
+                            colors=colors,
223
+                            width=width,
224
+                            height=height,
225
+                            outfile=outfile,
226
+                            export=export )
227
+    }
228
+
229
+    return( ranking )
230
+}
231
+
232
+
233
+#
234
+# Subpathway level measures
235
+#
236
+
237
+.subLevelMeasures <- function( subpathway, type, topTerms=20 )
238
+{
239
+    supportedTerms <- .getExternalMeasures()
240
+
241
+    if ( missing(type) )
242
+    {
243
+        type <- supportedTerms
244
+    }
245
+
246
+    failType <- type[!type %in% supportedTerms]
247
+    if ( length(failType) > 0 )
248
+    {
249
+        message('Option(s) ', failType, ' not supported.')
250
+    }
251
+
252
+    # Access local Rdata diles
253
+    targets <- .loadTermData( type )
254
+    if (is.null(targets)) { return(NULL) }
255
+
256
+    # Calculate p-values for each class
257
+    N        <- nrow(targets)
258
+    pValues  <- vector( mode='numeric', length=N )
259
+    uTargets <- unique( as.vector( targets ) )
260
+    res <- matrix(, nrow=N, ncol=5)
261
+    for ( i in 1:N )
262
+    {
263
+        termTargetsAll   <- which(targets[i,] != '0')
264
+        termTargetsOfSub <- which(!is.na(match(targets[i,], 
265
+                            subpathway)))
266
+        # Hypergeometric test compares sample to background
267
+        # Success.sample, Success.bgd, Failure.bgd, Sample.size
268
+        A <- length( termTargetsOfSub )
269
+        B <- length( uTargets )
270
+        C <- length( termTargetsAll )
271
+        D <- length( sub )
272
+        pValues[i] <- 1 - phyper(A, C, B-C, D)
273
+        pValues[i] <- floor(pValues[i] * 10000)/1000
274
+    }
275
+    names(pValues) <- rownames(targets)
276
+
277
+
278
+    # Keep significant terms
279
+    idx <- which(pValues < 0.05)
280
+    if ( length(idx) == 0 )
281
+    {
282
+        return(NULL)
283
+    }
284
+    pValues <- pValues[idx]
285
+    targets <- targets[idx, , drop=FALSE]
286
+    # Keep top terms
287
+    idx <- order(pValues)
288
+    pValues <- pValues[idx]
289
+    targets <- targets[idx, , drop=FALSE]
290
+    # Choose most significant classes
291
+    if ( length(targets) == 0 ) { return(NULL) }  
292
+    if ( length(pValues) < topTerms ) 
293
+        { topTerms <- length(pValues) }
294
+    pValues <- pValues[1:topTerms]
295
+    targets <- targets[1:topTerms, , drop=FALSE]
296
+
297
+    # Reduce target matrix with subpathway genes by removing rows and 
298
+    # and columns not containing any subpathway genes
299
+    idx <- targets %in% subpathway
300
+    idx <- matrix( idx, nrow=nrow(targets) )*1
301
+    targets[!idx] <- '0'
302
+
303
+    # Remove rows with no subpathway genes
304
+    idx <- targets == '0'
305
+    if ( length(idx) > 0 )
306
+    {
307
+        idx <- rowSums(idx) != ncol(targets)
308
+        targets <- targets[idx, , drop=FALSE]
309
+    }
310
+    # Remove columns with no subpathway genes
311
+    idx <- targets == '0'
312
+    if ( length(idx) > 0 )
313
+    {
314
+        idx <- colSums(idx) != nrow(targets)
315
+        targets <- targets[, idx, drop=FALSE]
316
+    }
317
+    # Check if any data remains
318
+    if ( length(targets) == 0 )
319
+        { return(NULL) }  
320
+
321
+    # Create edgelist for topmost results (class, subpathway genes)
322
+    edgeList <- matrix(, nrow=length(targets), ncol=3)
323
+    for ( i in seq_len(nrow(targets)) )
324
+    {
325
+        start <- 1 + (i-1)*ncol(targets)
326
+        end   <- i*ncol(targets)
327
+        genesPerTerm <- expand.grid(rownames(targets)[i], targets[i, ] )
328
+        edgeList[start:end, 1:2] <- as.matrix(genesPerTerm)
329
+        edgeList[start:end, 3] <- pValues[rownames(targets)[i]]
330
+    }
331
+    idx <- which(edgeList[, 2] != '0')
332
+    edgeList <- edgeList[idx, , drop=FALSE]
333
+
334
+    return ( edgeList )
335
+}
336
+
337
+
338
+subpathwayVisualization <- function( DEsubs.out, 
339
+                                references='', 
340
+                                submethod, 
341
+                                subname,
342
+                                colors=c('#FF0000FF', '#FF9900FF', '#CCFF00FF',
343
+                                        '#33FF00FF', '#00FF66FF', '#0066FFFF',
344
+                                        '#3300FFFF', '#CC00FFFF', '#FF0099FF',
345
+                                        '#EE82EEFF'), 
346
+                                scale=1, 
347
+                                shuffleColors=FALSE, 
348
+                                outfiles='', 
349
+                                export='pdf', 
350
+                                verbose=TRUE )
351
+{
352
+    if ( 'all' %in% references )
353
+    {
354
+        references <- .getExternalMeasures()
355
+        references <- c(references, 'GO', 'Disease', 'Regulator')
356
+    }
357
+    if ( length(scale) == 1 )
358
+    {
359
+        scale <- rep(scale, length(references))
360
+    }
361
+    if ( length(references) != length(scale) )
362
+    {
363
+        message('Number of references has to be equal to different 
364
+            scaling optios. Setting scale to 1. ')
365
+        scale <- rep(1, length(references))
366
+    }
367
+    if ( (!'' %in% outfiles)  && (length(outfiles) != length(references) ) )
368
+    {
369
+        message('Number of outfiles should be equal to number of references.')
370
+        return(NULL)
371
+    }
372
+    if ( '' %in% outfiles ) { outfiles <- rep('', length(references) ) }
373
+
374
+    for ( i in seq_len(length(references)) )
375
+    {
376
+        .subpathwayVisualization(DEsubs.out=DEsubs.out,
377
+                                reference=references[i],
378
+                                submethod=submethod,
379
+                                subname=subname,
380
+                                colors=colors,
381
+                                scale=scale[i],
382
+                                shuffleColors=shuffleColors,
383
+                                outfile=outfiles[i],
384
+                                export=export,
385
+                                verbose=verbose
386
+                                )
387
+    }
388
+}
389
+
390
+.subpathwayVisualization <- function( DEsubs.out, reference, submethod, 
391
+                                    subname, colors, scale, shuffleColors, 
392
+                                    outfile, export, verbose)
393
+{
394
+
395
+    org <- DEsubs.out[['org']]
396
+    edgeList <- DEsubs.out[['edgeList']]
397
+
398
+    if ( org != 'hsa' ) 
399
+        { return(NULL) } # Homo sapiens only
400
+    if ( is.null(submethod) ) 
401
+        { return(NULL) } 
402
+
403
+
404
+    supportedMethods <- subpathwayTypes()
405
+    supportedReferences <- .getExternalMeasures()
406
+    supportedReferences <- c(supportedReferences, 'GO', 'Disease', 'Regulator')
407
+
408
+    if (verbose)
409
+    {
410
+        message('Generating subpathway-level view ( ', reference,' )...', 
411
+                                                        appendLF = FALSE)
412
+    }
413
+
414
+    unsupportedOptions <- submethod[!submethod %in% supportedMethods]
415
+    if ( length(unsupportedOptions) > 0 )
416
+    {
417
+        message('Option(s) ', unsupportedOptions, ' not supported.')
418
+        return(NULL)
419
+    }
420
+    unsupportedReferences <- reference[!reference %in% supportedReferences]
421
+    if ( length(unsupportedReferences) > 0 )
422
+    {
423
+        message('Reference(s) ', unsupportedReferences, ' not supported.')
424
+        return(NULL)
425
+    }
426
+
427
+
428
+    # Subpathway information extraction
429
+    submethodName <- paste0('subAnalysis.', submethod)
430
+    subpathway.nodelist <- DEsubs.out[[submethodName]][[subname]]
431
+
432
+    # Subpathway (nodelist) to edgelist
433
+    idx1 <- which(edgeList[, 1] %in% subpathway.nodelist)
434
+    idx2 <- which(edgeList[, 2] %in% subpathway.nodelist)
435
+    idx <- intersect(idx1, idx2)
436
+    subpathway.edgelist <- edgeList[idx, ]
437
+
438
+    # Change annotation from entrez to HGNC
439
+    subpathway.nodelist <- .changeAnnotation(annData=subpathway.nodelist, 
440
+                        org='hsa', choice='entrezToHGNC')
441
+    subpathway.nodelist <- unname(subpathway.nodelist)
442
+
443
+
444
+    # Special care for double and triple references
445
+    references <- reference
446
+    top <- 30
447
+    agap <- 0; bgap <- 0; cgap <- 50
448
+    if ( reference %in% 'GO' )
449
+    {
450
+        references <- c('GO_bp', 
451
+                        'GO_cc', 
452
+                        'GO_mf')
453
+        agap <- 20; bgap <- 20; cgap <- 50
454
+    }
455
+    if ( reference %in% 'Disease' )
456
+    {
457
+        references <- c('Disease_OMIM', 'Disease_GAD')
458
+        agap <- 20; bgap <- 0; cgap <- 50
459
+    }
460
+    if ( reference %in% 'Regulator' )
461
+    {
462
+        references <- c('miRNA', 'TF')
463
+        agap <- 20; bgap <- 0; cgap <- 50
464
+    }
465
+    top <-  floor(top/length(references))
466
+
467
+    edgeLists <- list()
468
+    for ( i in seq_len(length(references)) )
469
+    {
470
+        subpathway.edgelist <- .subLevelMeasures(
471
+                            subpathway=subpathway.nodelist, 
472
+                            type=references[i], 
473
+                            topTerms=top)
474
+        if ( !is.null(subpathway.edgelist) )
475
+        {
476
+            if ( top > nrow(subpathway.edgelist) )
477
+                { top <- nrow(subpathway.edgelist) }
478
+            subpathway.edgelist <- subpathway.edgelist[1:top, , drop=FALSE]
479
+
480
+            R <- list(subpathway.edgelist)
481
+            names(R) <- references[i]
482
+            edgeLists <- c(edgeLists,  R)
483
+        }
484
+    }
485
+
486
+    if ( length(edgeLists) == 0 ) 
487
+    { 
488
+        if (verbose) { message('done.') }
489
+        return(NULL) 
490
+    }
491
+
492
+    # Merge rows and columns indepedently for each edgelist.
493
+    rowTerms <- c(); colTerms <- c()
494
+    for ( i in seq_len(length(edgeLists)) )
495
+    {
496
+        if ( is.null(edgeLists[[i]]) ) { next() }
497
+        rowTerms <- unique(c(rowTerms, edgeLists[[i]][, 1]))
498
+        colTerms <- unique(c(colTerms, edgeLists[[i]][, 2]))
499
+    }
500
+
501
+
502
+    edgeList <- do.call(rbind, edgeLists)
503
+    adjmat   <- matrix(0, nrow=length(rowTerms), ncol=length(colTerms))
504
+    rownames(adjmat) <- rowTerms
505
+    colnames(adjmat) <- colTerms
506
+    for ( i in seq_len(nrow(edgeList)) )
507
+    {
508
+        adjmat[ edgeList[i,1], edgeList[i,2] ] <- 1
509
+    }
510
+
511
+    if ( shuffleColors  )
512
+        { colors <- sample(colors, nrow(adjmat), replace=TRUE) }
513
+    if ( !shuffleColors  )
514
+        { colors <- rep(colors, length.out=nrow(adjmat)) }
515
+
516
+
517
+    # The user has supplied a custom directory
518
+    if ( (outfile != '') && ('pdf' %in% export) ) 
519
+    { 
520
+        .dir.create.rec(outfile) 
521
+    }
522
+    # No custom directory has been supplied
523
+    if ( (outfile == '') && ('pdf' %in% export) )
524
+    {
525
+        dir.create(cache[['outDir']], showWarnings=FALSE, recursive=TRUE)
526
+        outfile <- paste0(cache[['outDir']] , '//circos_', reference ,'.pdf')
527
+    }
528
+
529
+    .doCirclize( mat=adjmat, 
530
+                colors=colors,
531
+                agap=agap,
532
+                bgap=bgap,
533
+                cgap=cgap,
534
+                degree=250,
535
+                a=round(10/scale)/10,
536
+                b=round(10/scale)/10,
537
+                outfile=outfile,
538
+                export=export)
539
+
540
+    if (verbose) { message('done.') }
541
+}
542
+
543
+
544
+#
545
+# Organism level measures
546
+#
547
+organismVisualization <- function( DEsubs.out, 
548
+                            references='', 
549
+                            topSubs=10, 
550
+                            topTerms=20, 
551
+                            colors=colorRampPalette(c("white", "red"))(100), 
552
+                            export='pdf', 
553
+                            width=7, 
554
+                            height=6, 
555
+                            outfiles='', 
556
+                            verbose=TRUE )
557
+{
558
+    # Homo sapiens only
559
+    org <- DEsubs.out[['org']]
560
+    if (org != 'hsa') { return(NULL) }
561
+
562
+    if ( 'all' %in% references )
563
+    {
564
+        references <- .getExternalMeasures()
565
+    }
566
+
567
+    if ( (!'' %in% outfiles)  && (length(outfiles) != length(references) ) )
568
+    {
569
+        message('Number of outfiles should be equal to number of references.')
570
+        return(NULL)
571
+    }
572
+    if ( '' %in% outfiles ) { outfiles <- rep('', length(references) ) }
573
+
574
+    for ( i in seq_len(length(references)) )
575
+    {
576
+        res <- .organismVisualization(DEsubs.out=DEsubs.out,
577
+                                    references=references[i],
578
+                                    topSubs=topSubs,
579
+                                    topTerms=topTerms,
580
+                                    colors=colors,
581
+                                    export=export,
582
+                                    width=width,
583
+                                    height=height,
584
+                                    outfile=outfiles[i],
585
+                                    verbose=verbose)
586
+    }
587
+
588
+
589
+    return(invisible())
590
+}
591
+
592
+.organismVisualization <- function( DEsubs.out, references, topSubs, topTerms,
593
+                                    colors, export, width, height, 
594
+                                    outfile, verbose )
595
+{
596
+
597
+    org <- DEsubs.out[['org']]
598
+    type <- references
599
+    supportedTerms <- .getExternalMeasures()
600
+
601
+    if (verbose)
602
+    {
603
+        message(paste0('Generating organism-level view ( ', type , ' )...'), 
604
+                appendLF = FALSE)
605
+    }
606
+
607
+    failType <- type[!type %in% supportedTerms]
608
+    if ( length(failType) > 0 )
609
+    { 
610
+        message('Option(s) ', failType, ' not supported.')
611
+        return(invisible())
612
+    }
613
+
614
+    subpathways <- DEsubs.out[[5]]
615
+    termsPerSub <- list()
616
+    pValuesPerTermPerSub <- list()
617
+    pathNames <- c()
618
+
619
+
620
+    top <- c(topSubs, topTerms)
621
+    names(top) <- c('subpathways', 'terms') 
622
+    
623
+    if ( length(subpathways) < top['subpathways'] ) 
624
+        { top['subpathways'] <- length(subpathways) }
625
+
626
+    subpathways <- subpathways[1:top['subpathways']]
627
+    subpathways <- subpathways[!is.na(subpathways)]
628
+    offset <- 0.001
629
+
630
+    # Find most significant terms for each subpathway
631
+    for ( i in seq_len(length(subpathways)) )
632
+    {
633
+        sub <- unname(.changeAnnotation(annData=subpathways[[i]], org=org,
634
+                    choice='entrezToHGNC'))
635
+        edgeList <- .subLevelMeasures(subpathway=sub, type=type, 
636
+                    topTerms=top['subpathways'])
637
+        if ( !is.null(edgeList) )
638
+        {
639
+            termsPerSub[[i]] <- matrix( edgeList[, 1], nrow=1 )
640
+            pval <- matrix( as.numeric(edgeList[, 3]) + offset, nrow=1 )
641
+            pValuesPerTermPerSub[[i]] <- pval
642
+            pathNames <- c(pathNames, names(subpathways)[i] )
643
+        }
644
+    }
645
+
646
+    if ( length(termsPerSub) == 0) 
647
+    {
648
+        message('done', appendLF = TRUE) 
649
+        return(NULL) 
650
+    }
651
+    
652
+    # Convert lists to adjacency matrices
653
+    names(termsPerSub) <- pathNames
654
+    termsPerSub <- .unlistToMatrix(.fillMatrixList( termsPerSub ) )
655
+    names(pValuesPerTermPerSub) <- pathNames
656
+    pValuesPerTermPerSub <- .fillMatrixList( pValuesPerTermPerSub )
657
+    pValuesPerTermPerSub <- .unlistToMatrix( pValuesPerTermPerSub )
658
+
659
+    # Find topmost terms amongst the terms for all subpathways
660
+    uniqueTerms <- unique(as.vector(termsPerSub))
661
+    uniqueTerms <- uniqueTerms[uniqueTerms != '0']
662
+    counts <- vector(mode='numeric', length=length(uniqueTerms))
663
+    for ( i in seq_len(length(uniqueTerms)) )
664
+    {
665
+        counts[i] <- length(which(as.vector(termsPerSub) == uniqueTerms[i]))
666
+    }
667
+    names(counts) <- uniqueTerms
668
+    topTerms <- names(head(sort(counts, decreasing=TRUE), top['terms']))
669
+
670
+    # Keep only topmost terms for each subpathway
671
+    idx <- termsPerSub %in% topTerms
672
+    if ( length(idx) > 0 ) { termsPerSub[!idx] <- NA }
673
+
674
+    # Create an edgelist of topmost terms for all subpathways
675
+    termsPerSub.edgeList <- matrix(, nrow=length(termsPerSub), ncol=3)
676
+    for ( i in seq_len(nrow(termsPerSub)) )
677
+    {
678
+        if ( is.na(rownames(termsPerSub)[i]) ) { next() }
679
+
680
+        start <- 1 + (i-1)*ncol(termsPerSub)
681
+        end   <- i*ncol(termsPerSub)
682
+        data1 <- expand.grid( rownames(termsPerSub)[i], termsPerSub[i, ] )
683
+        data2 <- expand.grid(   rownames(termsPerSub)[i], 
684
+                                pValuesPerTermPerSub[i, ] - offset )
685
+
686
+        data.all <- cbind(data1, data2[, 2])
687
+        termsPerSub.edgeList[start:end, ] <- as.matrix(data.all)        
688
+    }
689
+
690
+    # Remove NA's
691
+    idx <- which(is.na(termsPerSub.edgeList[, 2]))
692
+    if ( length(idx) > 0 ) 
693
+    { 
694
+        termsPerSub.edgeList <- termsPerSub.edgeList[-idx, , drop=FALSE] 
695
+    }
696
+
697
+    termsPerSub.df <- as.data.frame(termsPerSub.edgeList, 
698
+                                                    stringsAsFactors=FALSE)
699
+    colnames(termsPerSub.df) <-  c('Subpathway', 'Term', 'pValue')
700
+
701
+    termsPerSub.df[, 'pValue'] <- as.numeric(termsPerSub.df[, 'pValue'])
702
+
703
+
704
+    .matrixVisualization(dataVis=termsPerSub.df, type='dotplot', 
705
+                        title=c('Subpathway', type, 'Q-values'),
706
+                        colors=colors,
707
+                        width=width, height=height,
708
+                        outfile=outfile,
709
+                        export=export)
710
+
711
+    if (verbose)
712
+        { message('done', appendLF = TRUE) }
713
+    
714
+
715
+    return(termsPerSub.edgeList)
716
+}
717
+
718
+
0 719
new file mode 100644
... ...
@@ -0,0 +1,281 @@
1
+DEsubs <- function( org, mRNAexpr, mRNAnomenclature, pathways, 
2
+                    DEtool, DEpar, CORtool, CORpar, subpathwayType,
3
+                    rankedList=NULL, verbose=TRUE)
4
+{
5
+    # Create the adjacency matrix
6
+    dataNet <- .constructNetwork(org=org, 
7
+                            mRNAexpr=mRNAexpr, 
8
+                            mRNAnomenclature=mRNAnomenclature, 
9
+                            pathways=pathways)
10
+    mRNAexpr <- dataNet[['mRNAexpr']]
11
+    edgeList <- dataNet[['edgeList']]
12
+
13
+    # Filter the nodes and the edges of the organism's pathways network
14
+    lens     <- suppressWarnings(split(1:ncol(mRNAexpr), 1:2))
15
+    lens     <- unname(sapply(lens, function(x) { length(x) }))
16
+    net      <- .pruneNetwork(  edgeList=edgeList,
17
+                                mRNAexpr=mRNAexpr, 
18
+                                DEGchoice=DEtool, 
19
+                                DEGthresh=DEpar, 
20
+                                classes=c(rep(1, lens[1]), rep(2, lens[2]) ),
21
+                                corr_threshold=CORpar, org=org,
22
+                                CORtool=CORtool,
23
+                                rankedList=rankedList,
24
+                                verbose=verbose)
25
+
26
+    edgeList <- net[['edgeList']]
27
+    DEgenes  <- net[['DEgenes']]
28
+    output   <- list('org'=org, 'mRNAnomenclature'=mRNAnomenclature,
29
+                    'edgeList'=edgeList, 'DEgenes'=DEgenes)
30
+
31
+    # Choose specific subpathways
32
+    subTypes <- subpathwayTypes(grouping=subpathwayType)
33
+
34
+    # Subpathway analysis
35
+    for (subType in subTypes)
36
+    {
37
+        subs <- .subpathwayAnalysis( edgeList=edgeList, 
38
+                                    method=subType,
39
+                                    DEgenes=DEgenes,
40
+                                    org=org,
41
+                                    verbose=verbose )
42
+        output <- c(output, subs)
43
+    }
44
+
45
+    return( output )
46
+}
47
+
48
+
49
+.DEanalysis <- function( count.matrix, DEGchoice, classes )
50
+{
51
+    # 
52
+    # Differential expression analysis using various DE analysis tools from
53
+    # 
54
+    # Soneson,C. and Delorenzi,M. (2013) A comparison of methods for 
55
+    # differential expression analysis of RNA-seq data. BMC bioinformatics, 
56
+    # 14(1), 1.
57
+    #
58
+
59
+    if ( missing(count.matrix) ) { message('Please supply a matrix.') }
60
+    if ( missing(classes) )      { message('Please supply the classes.') }
61
+    if ( missing(DEGchoice) )    { message('Please supply a type.') }
62
+
63
+    supportedMethods <- c('edgeR', 'DESeq', 'EBSeq', 'samr', 'NBPSeq', 
64
+                            'voom+limma', 'vst+limma', 'TSPM')
65
+
66
+    if ( DEGchoice == 'edgeR' )
67
+    {
68
+        # run edgeR
69
+        edgeR.dgelist <- DGEList(counts=count.matrix,  group=factor(classes))
70
+        edgeR.dgelist <- calcNormFactors(edgeR.dgelist,  method="TMM")
71
+        edgeR.dgelist <- estimateCommonDisp(edgeR.dgelist)
72
+        edgeR.dgelist <- estimateTagwiseDisp(edgeR.dgelist, trend="movingave")
73
+        edgeR.test    <- exactTest(edgeR.dgelist)
74
+        edgeR.pvalues <- edgeR.test[['table']][['PValue']]
75
+        genes             <- rownames(edgeR.test[['table']])
76
+        edgeR.adjpvalues  <- p.adjust(edgeR.pvalues,  method="BH")
77
+        adjpvalues        <- edgeR.adjpvalues
78
+        names(adjpvalues) <- genes
79
+
80
+        return(adjpvalues)
81
+    }
82
+    if ( DEGchoice == 'DESeq' )
83
+    {
84
+        # run DESeq
85
+        DESeq.cds <- DESeq::newCountDataSet(countData=count.matrix, 
86
+                                    conditions=factor(classes))
87
+        DESeq.cds <- DESeq::estimateSizeFactors(DESeq.cds)
88
+        DESeq.cds <- DESeq::estimateDispersions(DESeq.cds, 
89
+                            sharingMode="maximum", method="pooled", 
90
+                            fitType="local")
91
+        DESeq.test        <- nbinomTest(DESeq.cds, "1", "2")
92
+        DESeq.pvalues     <- DESeq.test[['pval']]
93
+        genes             <- DESeq.test[['id']]
94
+        DESeq.adjpvalues  <- p.adjust(DESeq.pvalues, method="BH")
95
+        adjpvalues        <- DESeq.adjpvalues
96
+        names(adjpvalues) <- genes
97
+        
98
+        return(adjpvalues)
99
+    }
100
+    if ( DEGchoice == 'voom+limma' )
101
+    {
102
+        # voom+limma
103
+        nf                <- calcNormFactors(count.matrix,  method="TMM")
104
+        voom.data         <- voom(  count.matrix, 
105
+                                    design=model.matrix(~factor(classes)), 
106
+                                    lib.size=colSums(count.matrix)*nf)
107
+        voom.data[['genes']] <- rownames(count.matrix)
108
+        voom.fitlimma        <- lmFit( voom.data, 
109
+                                    design=model.matrix(~factor(classes)))
110
+        voom.fitbayes     <- eBayes(voom.fitlimma)
111
+        voom.pvalues      <- voom.fitbayes[['p.value']][, 2]
112
+        voom.adjpvalues   <- p.adjust(voom.pvalues, method="BH")
113
+        voom.genes        <- rownames(voom.fitbayes[['p.value']])
114
+        adjpvalues        <- voom.adjpvalues
115
+        names(adjpvalues) <- voom.genes
116
+
117
+        return(adjpvalues)
118
+    }
119
+    if ( DEGchoice == 'samr' )
120
+    {
121
+        # samr
122
+        sink( tempfile() ) 
123
+        SAMseq.test <- suppressMessages(SAMseq(count.matrix, classes, 
124
+                            resp.type="Two class unpaired", 
125
+                            geneid = rownames(count.matrix), 
126
+                            genenames = rownames(count.matrix),
127
+                            nperms = 100, nresamp = 20, fdr.output = 1))
128
+        SAMseq.result.table <- rbind( 
129
+                            SAMseq.test[['siggenes.table']][['genes.up']], 
130
+                            SAMseq.test[['siggenes.table']][['genes.lo']])
131
+        SAMseq.score        <- rep(0,  nrow(count.matrix))
132
+        idx                 <- match(SAMseq.result.table[,1], 
133
+                                    rownames(count.matrix))
134
+        SAMseq.score[idx]   <- as.numeric(SAMseq.result.table[,3])
135
+        SAMseq.FDR          <- rep(1, nrow(count.matrix))
136
+        idx                 <- match(SAMseq.result.table[,1], 
137
+                                    rownames(count.matrix)) 
138
+        SAMseq.FDR[idx]     <- as.numeric(SAMseq.result.table[,5])/100
139
+        adjpvalues          <- SAMseq.FDR
140
+        genes               <- SAMseq.result.table[, 'Gene ID']
141
+        names(adjpvalues)   <- genes
142
+
143
+        sink()
144
+
145
+        return(adjpvalues)
146
+    }
147
+    if ( DEGchoice == 'EBSeq' )
148
+    {
149
+        # run EBSeq
150
+        sizes       <-  MedianNorm(count.matrix)
151
+        EBSeq.test  <-  suppressMessages( EBTest(Data=count.matrix, 
152
+                        Conditions=factor(classes), sizeFactors=sizes,  
153
+                        maxround=10))
154
+
155
+        EBSeq.ppmat             <-  GetPPMat(EBSeq.test)
156
+        EBSeq.probabilities.DE  <-  EBSeq.ppmat[, "PPDE"]
157
+        EBSeq.lFDR  <-  1 - EBSeq.ppmat[, "PPDE"]
158
+        EBSeq.FDR   <-  rep(NA,  length(EBSeq.lFDR))
159
+        names(EBSeq.FDR) <- names(EBSeq.lFDR)
160
+        for  (i  in  seq_len(length(EBSeq.lFDR)) )  
161
+        {
162
+            idx          <- which(EBSeq.lFDR <= EBSeq.lFDR[i])
163
+            EBSeq.FDR[i] <- mean(EBSeq.lFDR[idx])
164
+        }
165
+        adjpvalues <- EBSeq.FDR
166
+
167
+        return(adjpvalues)
168
+    }
169
+    if ( DEGchoice == 'vst+limma' )
170
+    {
171
+        # vst+limma
172
+        DESeq.cds  <- newCountDataSet(  countData=count.matrix, 
173
+                                        conditions=factor(classes))
174
+        DESeq.cds  <- estimateSizeFactors(DESeq.cds)
175
+        DESeq.cds  <- estimateDispersions(  DESeq.cds,  
176
+                                            method="blind", fitType="local")
177
+        DESeq.vst  <- getVarianceStabilizedData(DESeq.cds)
178
+        DESeq.vst.fitlimma   <- lmFit(  DESeq.vst,
179
+                                        design=model.matrix(~factor(classes)))
180
+        DESeq.vst.fitbayes   <- eBayes(DESeq.vst.fitlimma)
181
+        DESeq.vst.pvalues    <- DESeq.vst.fitbayes[['p.value']][, 2]
182
+        genes                <- rownames(DESeq.vst.fitbayes[['p.value']])
183
+        DESeq.vst.adjpvalues <- p.adjust(DESeq.vst.pvalues, method="BH")
184
+        adjpvalues           <- DESeq.vst.adjpvalues 
185
+        names(adjpvalues)    <- genes
186
+
187
+        return(adjpvalues)
188
+    }
189
+    if ( DEGchoice == 'NBPSeq' )
190
+    {
191
+        # NBPSeq
192
+        NBPSeq.dgelist      <- DGEList( counts=count.matrix, 
193
+                                        group=factor(classes))
194
+        NBPSeq.dgelist      <- calcNormFactors(NBPSeq.dgelist, method="TMM")
195
+        NBPSeq.norm.factors <- as.vector(
196
+                            NBPSeq.dgelist[['samples']][['norm.factors']])
197
+        
198
+        capture.output(NBPSeq.test <- nbp.test(counts=count.matrix, 
199
+            grp.ids=classes, grp1=1,grp2=2, norm.factors=NBPSeq.norm.factors))
200
+        NBPSeq.pvalues    <- NBPSeq.test[['p.values']]
201
+        NBPSeq.adjpvalues <- NBPSeq.test[['q.values']]
202
+        adjpvalues        <- NBPSeq.adjpvalues 
203
+        names(adjpvalues) <- rownames(NBPSeq.test[['counts']])
204
+
205
+        return(adjpvalues)
206
+    }
207
+    if ( DEGchoice == 'TSPM' )
208
+    {
209
+        TSPM.dgelist <- DGEList(counts = count.matrix, group = factor(classes))
210
+        TSPM.dgelist  <- calcNormFactors(TSPM.dgelist, method = "TMM")
211
+        v1 <- as.vector(TSPM.dgelist[['samples']][['norm.factors']])
212
+        v2 <- as.vector(TSPM.dgelist[['samples']][['lib.size']])
213
+        norm.lib.sizes  <- v1 * v2 
214
+        TSPM.test <- TSPM(counts = count.matrix, x1 = factor(classes), 
215
+                    x0 = rep(1, length(classes)), lib.size = norm.lib.sizes)
216
+        TSPM.pvalues    <- TSPM.test[['pvalues']]
217
+        TSPM.adjpvalues <- TSPM.test[['padj']]
218
+        adjpvalues      <- TSPM.adjpvalues
219
+        genes           <- rownames(TSPM.dgelist[['counts']])
220
+        names(adjpvalues) <- genes
221
+
222
+        return(adjpvalues)
223
+    }
224
+    if ( DEGchoice == 'DESeq2' )
225
+    {
226
+        # run DESeq2
227
+        sink(tempfile())
228
+        DESeq2.ds <- DESeq2::DESeqDataSetFromMatrix(countData = count.matrix,
229
+                        colData = data.frame(condition = factor(classes)), 
230
+                        design = ~ condition)
231
+        DESeq2.ds <- DESeq2::estimateSizeFactors( DESeq2.ds )
232
+        DESeq2.ds <- DESeq2::estimateDispersions( DESeq2.ds, fitType="local")
233
+        DESeq2.ds <- DESeq2::nbinomWaldTest( DESeq2.ds )
234
+        DESeq2.results <- DESeq2::results(DESeq2.ds )
235
+        adjpvalues <- p.adjust(DESeq2.results[['pvalue']], method="BH")
236
+        names(adjpvalues) <- rownames(count.matrix)
237
+        sink()
238
+
239
+        return(adjpvalues)
240
+    }
241
+    if ( DEGchoice == 'vst2+limma' )
242
+    {
243
+        # vst(DESeq2)+limma
244
+        
245
+        sink(tempfile())
246
+        DESeq2.ds <- DESeq2::DESeqDataSetFromMatrix(countData = count.matrix,
247
+                        colData = data.frame(condition = factor(classes)), 
248
+                        design = ~ condition)
249
+        DESeq2.ds <- DESeq2::estimateSizeFactors( DESeq2.ds )
250
+        DESeq2.ds <- DESeq2::estimateDispersions( DESeq2.ds, fitType="local")
251
+        DESeq2.vst <- DESeq2::getVarianceStabilizedData( DESeq2.ds )
252
+
253
+        DESeq2.vst.fitlimma <- lmFit(  DESeq2.vst,
254
+                                        design=model.matrix(~factor(classes)))
255
+        DESeq2.vst.fitbayes <- eBayes(DESeq2.vst.fitlimma)
256
+        DESeq2.vst.pvalues  <- DESeq2.vst.fitbayes[['p.value']][, 2]
257
+        genes               <- rownames(DESeq2.vst.fitbayes[['p.value']])
258
+        DESeq2.vst.adjpvalues <- p.adjust(DESeq2.vst.pvalues, method="BH")
259
+        adjpvalues          <- DESeq2.vst.adjpvalues 
260
+        names(adjpvalues)   <- genes
261
+        sink()
262
+
263
+        return(adjpvalues)
264
+    }
265
+    if  ( !is.null(DEGchoice) )
266
+    {
267
+        unsupportedOptions <- DEGchoice[!DEGchoice %in% supportedMethods]
268
+        if ( length(unsupportedOptions) > 0 )
269
+        {
270
+            message('Option ', unsupportedOptions, ' not supported.')
271
+            message('Supported options are ', 
272
+                        paste0(supportedMethods, collapse=', '), '.') 
273
+            return( NULL ) 
274
+        }
275
+    }
276
+
277
+
278
+
279
+    return( NULL )
280
+}
281
+
0 282
new file mode 100644
... ...
@@ -0,0 +1,133 @@
1
+##-------------------------------------------------------------------
2
+##     Name: TSPM.R
3
+##      R code for the paper by Paul L. Auer and R.W. Doerge:
4
+##     "A Two-Stage Poisson Model for Testing RNA-Seq Data"
5
+##     Date: February 2011
6
+##     Contact: Paul Auer         plivermo@fhcrc.org
7
+##         R.W. Doerge         doerge@purdue.edu
8
+
9
+
10
+##     Example:
11
+##     counts <- matrix(0, nrow=1000, ncol=10)
12
+##     for(i in 1:1000){
13
+##        lambda <- rpois(n=1, lambda=10)
14
+##        counts[i,] <- rpois(n=10, lambda=lambda)
15
+##     }
16
+##     x1 <- gl(n=2, k=5, labels=c("T", "C"))
17
+##     x0 <- rep(1, times=10)
18
+##     lib.size <- apply(counts,2,sum)
19
+##    result <- TSPM(counts, x1, x0, lib.size)
20
+##---------------------------------------------------------------------
21
+
22
+
23
+#######################################################################
24
+###### The TSPM function ##############################################
25
+#######################################################################
26
+
27
+
28
+TSPM <- function(counts, x1, x0, lib.size, alpha.wh=0.05){
29
+
30
+## Input:
31
+#counts:   a matrix of RNA-Seq gene counts (genes are rows, samples are 
32
+#          columns)
33
+#x1:       a vector of treatment group factors (under the alternative 
34
+#          hypothesis)
35
+#x0:       a vector of treatment group factors (under the null hypothesis)
36
+#lib.size: a vector of RNA-Seq library sizes. This could simply be obtained
37
+#          by specifying lib.size <- apply(counts,2,sum). It may also be any 
38
+#          other appropriate scaling factor.
39
+#alpha.wh: the significance threshold to use for deciding whether a gene is 
40
+#          overdispersed.
41
+#          Defaults to 0.05.
42
+
43
+
44
+## Output:
45
+#log.fold.change:     a vector containing the estimated log fold changes for 
46
+#                     each gene
47
+#pvalues:             a vector containing the raw p-values testing differential 
48
+#                     expression for each gene.
49
+#index.over.disp:     a vector of integer values containing the indices of the 
50
+#                     over-dispersed genes.
51
+#index.not.over.disp: a vector of integer values containing the indices of the 
52
+#                     non-over-dispersed genes.
53
+#padj:                a vector containing the p-values after adjusting for 
54
+#                     multiple testing using the 
55
+#                     method of Benjamini-Hochberg
56
+
57
+
58
+######## The main loop that fits the GLMs to each gene #####################
59
+
60
+### Initializing model parameters ####
61
+n <- dim(counts)[1]
62
+per.gene.disp <- NULL
63
+LRT <- NULL
64
+score.test <- NULL
65
+LFC <- NULL
66
+
67
+###### Fitting the GLMs for each gene #################
68
+    for(i in 1:n){
69
+        ### Fit full and reduced models ###
70
+        model.1 <- glm(as.numeric(counts[i,]) ~ x1, offset=log(lib.size), 
71
+                    family=poisson)
72
+        model.0 <- glm(as.numeric(counts[i,]) ~ x0, offset=log(lib.size), 
73
+                    family=poisson)
74
+
75
+        ### Obtain diagonals of Hat matrix from the full model fit ###
76
+        hats <- hatvalues(model.1)
77
+
78
+        ### Obtain Pearson overdispersion estimate ####
79
+        per.gene.disp[i] <- sum(residuals(model.1, 
80
+                            type="pearson")^2)/model.1$df.residual
81
+
82
+        ### Obtain Likelihood ratio statistic ####
83
+        LRT[i] <- deviance(model.0)-deviance(model.1)
84
+
85
+        ### Obtain score test statistic ####
86
+        score.test[i] <- 1/(2*length(counts[i,])) * sum(residuals(model.1, 
87
+            type="pearson")^2 - ((counts[i,] - 
88
+            hats*model.1$fitted.values)/model.1$fitted.values))^2
89
+
90
+        ### Obtain the estimated log fold change ###
91
+        LFC[i] <- -model.1$coef[2]
92
+    }
93
+
94
+## Initialize parameters for Working-Hotelling bands around the score TSs ###
95
+qchi <- qchisq(df=1, (1:n-0.5)/n)
96
+MSE <- 2
97
+UL <- NULL
98
+
99
+#### Obtain the upper boundary of the WH bands ############################
100
+xbar <- mean(qchi)
101
+bottom <- sum((qchi-xbar)^2)
102
+top <- (qchi-xbar)^2
103
+s <- sqrt(MSE*(1/n) + (top/bottom))
104
+W <- sqrt(2*qf(df1=1, df2=n-1, p=1-(alpha.wh/n)))
105
+UL <- pmax(qchi + W*s,1)
106
+
107
+###### Obtain the indices of the over-dispersed and not-over-dispersed genes, 
108
+# respectively ##########
109
+
110
+cutoff <- min(which(sort(score.test)-UL > 0))
111
+temp <- cutoff-1 + seq(cutoff:length(score.test))
112
+over.disp <- which(score.test %in% sort(score.test)[temp])
113
+not.over.disp <- setdiff(1:length(score.test), over.disp)
114
+
115
+###### Compute p-values ####################################
116
+p.f <- pf(LRT[over.disp]/per.gene.disp[over.disp], df1=1, 
117
+        df2=model.1$df.residual, lower.tail=FALSE)
118
+p.chi <- pchisq(LRT[not.over.disp], df=1, lower.tail=FALSE)
119
+p <- NULL
120
+p[over.disp] <- p.f
121
+p[not.over.disp] <- p.chi
122
+
123
+##### Adjust the p-values using the B-H method ####################
124
+p.bh.f <- p.adjust(p.f, method="BH")
125
+p.bh.chi <- p.adjust(p.chi, method="BH")
126
+final.p.bh.tagwise <- NULL
127
+final.p.bh.tagwise[over.disp] <- p.bh.f
128
+final.p.bh.tagwise[not.over.disp] <- p.bh.chi
129
+
130
+### Output ###
131
+list(log.fold.change=LFC, pvalues=p, index.over.disp=over.disp, 
132
+        index.not.over.disp=not.over.disp, padj=final.p.bh.tagwise)
133
+}
0 134
new file mode 100644
... ...
@@ -0,0 +1,210 @@
1
+.constructNetwork <- function( org, mRNAexpr, mRNAnomenclature='entrezgene', 
2
+                                pathways='all')
3
+{
4
+    # If a filename is given, read a text file from the 'User' directory.
5
+    if ( class(mRNAexpr) == 'character' )
6
+    {
7
+        dir <- paste0(cache[['baseDir']], '//User') 
8
+        mRNAexpr <- readLines(paste(dir, mRNAexpr, sep='//'))
9
+        mRNAexpr <- strsplit(mRNAexpr, '\t')
10
+        mRNAexpr <- do.call('rbind', mRNAexpr)
11
+        rownames(mRNAexpr) <- mRNAexpr[, 1]
12
+        mRNAexpr <- mRNAexpr[, -1]
13
+        class(mRNAexpr) <- 'numeric' 
14
+    }
15
+
16
+    # Change nomenclature if necessary
17
+
18
+    if ( mRNAnomenclature != 'entrezgene' ) 
19
+    {
20
+        file <- 'extdata//Data//libraryEntrezToExternalNomenclature.RData'
21
+        file  <- system.file(file, package='DEsubs')
22
+        load(file, e <- new.env())
23
+        orgLib <- e[['libraryEntrezToExternalNomenclature']][[org]]
24
+        lib <- orgLib[[mRNAnomenclature]]
25
+
26
+        # Keep rows with entrez ids
27
+        idx <- which( as.character(rownames(mRNAexpr)) %in% lib[, 2] )
28
+        mRNAexpr <- mRNAexpr[idx, ]
29
+        
30
+        # Convert to entrez ids
31
+        xlib <- lib[, 1]
32
+        names(xlib) <- lib[, 2]
33
+        rownames(mRNAexpr) <- xlib[rownames(mRNAexpr)]        
34
+    }
35
+
36
+    # Create pathway graph
37
+    file  <- system.file('extdata//Data//edgeLists.RData', package='DEsubs')
38
+    load(file, e <- new.env())
39
+    edgeList <- e[['edgeLists']][[org]]
40
+
41
+    # Filter edgelist according to pathway type
42
+    idx <- sapply(edgeList, is.factor)
43
+    edgeList[idx] <- lapply(edgeList[idx], as.character)
44
+
45
+    pathIds <- as.numeric(edgeList[, 4])
46
+    if ( pathways == 'Metabolic')
47
+        { edgeList <- edgeList[which(pathIds < 2000), ] }
48
+    if ( pathways == 'Non-Metabolic')
49
+        { edgeList <- edgeList[which(pathIds >= 2000), ] }
50
+
51
+    # Filter edgelist with gene within the RNA-seq data
52
+    idx1 <- which(edgeList[, 1] %in% rownames(mRNAexpr))
53
+    idx2 <- which(edgeList[, 2] %in% rownames(mRNAexpr))
54
+    idx <- intersect(idx1, idx2)
55
+    edgeList <- edgeList[idx, ]
56
+
57
+
58
+    return( list(mRNAexpr=mRNAexpr, edgeList=edgeList) )
59
+}
60
+