Browse code

cleaning up notes for v3

sherman5 authored on 24/01/2018 23:38:02
Showing86 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: CoGAPS
2
-Version: 2.99.0
2
+Version: 2.7.0
3 3
 Date: 2014-08-23
4 4
 Title: Coordinated Gene Activity in Pattern Sets
5 5
 Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian,
... ...
@@ -1,16 +1,18 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3 3
 export(CoGAPS)
4
+export(CoGapsFromCheckpoint)
4 5
 export(GWCoGAPS)
5 6
 export(binaryA)
6 7
 export(calcCoGAPSStat)
7 8
 export(calcGeneGSStat)
8 9
 export(calcZ)
9 10
 export(createGWCoGAPSSets)
11
+export(displayBuildReport)
10 12
 export(gapsMapRun)
11 13
 export(gapsRun)
12
-export(gapsRunFromCheckpoint)
13 14
 export(generateSeeds)
15
+export(getParam)
14 16
 export(patternMarkers)
15 17
 export(patternMatch4Parallel)
16 18
 export(patternMatcher)
... ...
@@ -25,6 +27,8 @@ export(reOrderBySet)
25 27
 export(reconstructGene)
26 28
 export(reorderByPatternMatch)
27 29
 export(residuals)
30
+export(setParam)
31
+exportClasses(GapsParams)
28 32
 import(doParallel)
29 33
 import(foreach)
30 34
 import(ggplot2)
... ...
@@ -52,7 +56,12 @@ importFrom(graphics,points)
52 56
 importFrom(graphics,screen)
53 57
 importFrom(graphics,split.screen)
54 58
 importFrom(graphics,title)
59
+importFrom(methods,"slot<-")
60
+importFrom(methods,callNextMethod)
55 61
 importFrom(methods,is)
62
+importFrom(methods,new)
63
+importFrom(methods,slot)
64
+importFrom(methods,validObject)
56 65
 importFrom(reshape2,melt)
57 66
 importFrom(stats,D)
58 67
 importFrom(stats,as.dist)
... ...
@@ -5,39 +5,43 @@
5 5
 #'the data matrix
6 6
 #' @param D data matrix
7 7
 #' @param S uncertainty matrix (std devs for chi-squared of Log Likelihood)
8
-#' @param params GapsParams object 
9 8
 #' @return list with A and P matrix estimates
9
+#' @importFrom methods new
10 10
 #' @export
11
-CoGaps <- function(D, S, params = new('GapsParams', 7, 1000, 1000), ...)
11
+CoGAPS <- function(D, S, ...)
12 12
 {
13 13
     # process v2 style arguments for backwards compatibility
14
-    params <- oldParams(params, list(...))
14
+    extraArgs <- list(...)
15
+    params <- oldParams(new('GapsParams', 7, 1000, 1000), extraArgs)
15 16
 
16 17
     # check data
17
-    checkParamsWithData(params, nrow(D), ncol(D), nrow(S), ncol(S))
18
-    if (any(D) < 0 | any(S) < 0) # too slow for large matrices ?
18
+    if (missing(D) & length(extraArgs$data))
19
+        D <- extraArgs$data
20
+    if (missing(S) & length(extraArgs$unc))
21
+        S <- extraArgs$unc
22
+    if (any(D < 0) | any(S < 0))
19 23
         stop('D and S matrix must be non-negative')
24
+    checkParamsWithData(params, nrow(D), ncol(D), nrow(S), ncol(S))
20 25
 
21
-    # run algorithm
22
-    result <- cogaps_cpp(as.matrix(D), as.matrix(S), nFactor, alphaA,
23
-        alphaP, nEquil, floor(nEquil/10), nSample, max_gibbmass_paraA,
24
-        max_gibbmass_paraP, fixedPatterns, whichMatrixFixed, seed, messages,
25
-        singleCellRNASeq, nOutR, numSnapshots, checkpoint_interval)
26
+    # run algorithm with call to C++ code
27
+    result <- cogaps_cpp(as.matrix(D), as.matrix(S),
28
+        as.matrix(params$fixedPatterns), params)
26 29
 
27 30
     # backwards compatible with v2
28
-    if (length(list(...)$GStoGenes))
31
+    if (!is.null(extraArgs$GStoGenes))
29 32
     {
30
-        warning('the GStoGenes argument is depracted with v3.0,')
31
-        if (list(...)$plot)
33
+        #warning('the GStoGenes argument is deprecated with v3.0')
34
+        if (is.null(extraArgs$plot) | extraArgs$plot)
32 35
             plotGAPS(result$Amean, result$Pmean)
33
-        GSP <- calcCoGAPSStat(result$Amean, result$Asd, list(...)$GStoGenes,
34
-            list(...)$nPerm)
36
+        if (is.null(extraArgs$nPerm))
37
+            extraArgs$nPerm <- 500
38
+        GSP <- calcCoGAPSStat(result$Amean, result$Asd, extraArgs$GStoGenes,
39
+            extraArgs$nPerm)
35 40
         result <- list(meanChi2=result$meanChi2, D=D, Sigma=S,
36 41
             Amean=result$Amean, Asd=result$Asd, Pmean=result$Pmean,
37 42
             Psd=result$Psd, GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg,
38 43
             GSActEst=GSP$GSActEst)
39 44
     }
40
-
41 45
     return(result)
42 46
 }
43 47
 
... ...
@@ -52,22 +56,33 @@ CoGaps <- function(D, S, params = new('GapsParams', 7, 1000, 1000), ...)
52 56
 #' @export
53 57
 CoGapsFromCheckpoint <- function(D, S, path)
54 58
 {
55
-    # TODO add checksum to make sure D,S are correct
56 59
     cogapsFromCheckpoint_cpp(D, S, path)
57 60
 }
58 61
 
62
+GWCoGapsFromCheckpoint <- function(fname)
63
+{
64
+    #TODO
65
+}
66
+
67
+#' Display Information About Package Compilation
68
+#' @export
69
+displayBuildReport <- function()
70
+{
71
+    displayBuildReport_cpp()
72
+}
73
+
59 74
 #' Backwards Compatibility with v2
60 75
 #'
61 76
 #' @param D data matrix
62 77
 #' @param S uncertainty matrix
63 78
 #' @param ... v2 style parameters
64 79
 #' @return list with A and P matrix estimates
80
+#' @importFrom methods new
65 81
 #' @export
66 82
 gapsRun <- function(D, S, ...)
67 83
 {
68
-    warning('gapsRun is depracated with v3.0, use CoGAPS')
69
-    params <- new('GapsParams', 7, 1000, 1000)
70
-    params <- oldParams(params, list(...))
84
+    #warning('gapsRun is deprecated with v3.0, use CoGAPS')
85
+    params <- oldParams(new('GapsParams', 7, 1000, 1000), list(...))
71 86
     CoGAPS(D, S, params)
72 87
 }
73 88
 
... ...
@@ -78,12 +93,12 @@ gapsRun <- function(D, S, ...)
78 93
 #' @param FP data.frame with rows giving fixed patterns for P
79 94
 #' @param ... v2 style parameters
80 95
 #' @return list with A and P matrix estimates
96
+#' @importFrom methods new
81 97
 #' @export
82 98
 gapsMapRun <- function(D, S, FP, ...)
83 99
 {
84
-    warning('gapsMapRun is depracated with v3.0, use CoGaps')
85
-    params <- new('GapsParams', 7, 1000, 1000)
86
-    params <- oldParams(params, list(...))
100
+    #warning('gapsMapRun is deprecated with v3.0, use CoGaps')
101
+    params <- oldParams(new('GapsParams', 7, 1000, 1000), list(...))
87 102
     CoGAPS(D, S, params)
88 103
 }
89 104
 
... ...
@@ -91,14 +106,15 @@ gapsMapRun <- function(D, S, FP, ...)
91 106
 oldParams <- function(params, args)
92 107
 {
93 108
     # standard params
94
-    if (length(args$nFactor))      params@nFactor    <- args$nFactor
95
-    if (length(args$nEquil))       params@nEquil     <- args$nEquil
96
-    if (length(args$nSample))      params@nSample    <- args$nSample
97
-    if (length(args$numSnapshots)) params@nSnapshots <- args$numSnapshots
109
+    if (length(args$nFactor))      params@nFactor    <- as.integer(args$nFactor)
110
+    if (length(args$nEquil))       params@nEquil     <- as.integer(args$nEquil)
111
+    if (length(args$nSample))      params@nSample    <- as.integer(args$nSample)
98 112
     if (length(args$alphaA))       params@alphaA     <- args$alphaA
99 113
     if (length(args$alphaP))       params@alphaP     <- args$alphaP
100
-    if (length(args$seed))         params@seed       <- args$seed
114
+    if (length(args$seed))         params@seed       <- as.integer(args$seed)
101 115
     if (length(args$messages))     params@messages   <- args$messages
116
+    if (length(args$numSnapshots))
117
+        params@nSnapshots <- as.integer(args$numSnapshots)
102 118
 
103 119
     # gapsMap params
104 120
     if (length(args$FP))
... ...
@@ -109,4 +125,4 @@ oldParams <- function(params, args)
109 125
 
110 126
     # return v3 style GapsParams
111 127
     return(params)    
112
-}
128
+}
113 129
\ No newline at end of file
... ...
@@ -1,43 +1,42 @@
1
-#' Genome Wide CoGAPS
1
+#' GWCoGAPS
2 2
 #'
3
-#' @details calls the C++ MCMC code and performs Bayesian
4
-#'  matrix factorization returning the two matrices that reconstruct
5
-#'  the data matrix for whole genome data;
6
-#' @param D data matrix
7
-#' @param S uncertainty matrix (std devs for chi-squared of Log Likelihood)
8
-#' @param params GapsParams object 
9
-#' @param nSets number of sets for parallelization
10
-#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets.
11
-#' @param saveBySetResults logical indicating whether to save by intermediary by set results. Default is FALSE.
12
-#' @param fname character string used to label file output. Default is "GWCoGAPS.AP.fixed"
13
-#' @param PatternsMatchFN function to use for pattern matching across sets
14
-#' @param Cut number of branches at which to cut dendrogram used in patternMatch4Parallel
15
-#' @param minNS minimum of individual set contributions a cluster must contain
16
-#' @param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun}
17
-#' @export
18
-#' @seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}}
3
+#'\code{GWCoGAPS} calls the C++ MCMC code and performs Bayesian
4
+#'matrix factorization returning the two matrices that reconstruct
5
+#'the data matrix for whole genome data;
6
+#'
7
+#'@param D data matrix
8
+#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood)
9
+#'@param nFactor number of patterns (basis vectors, metagenes), which must be
10
+#'greater than or equal to the number of rows of FP
11
+#'@param nSets number of sets for parallelization
12
+#'@param nCores number of cores for parallelization. If left to the default NA, nCores = nSets.
13
+#'@param saveBySetResults logical indicating whether to save by intermediary by set results. Default is FALSE.
14
+#'@param fname character string used to label file output. Default is "GWCoGAPS.AP.fixed"
15
+#'@param PatternsMatchFN function to use for pattern matching across sets
16
+#'@param Cut number of branches at which to cut dendrogram used in patternMatch4Parallel
17
+#'@param minNS minimum of individual set contributions a cluster must contain
18
+#'@param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun}
19
+#'@export
20
+#'@seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}}
19 21
 #' @examples \dontrun{
20 22
 #' GWCoGAPS(nCores=NA, D, S, nFactor, nSets,saveBySetResults=TRUE, fname=fname,
21 23
 #' PatternsMatchFN = patternMatch4Parallel,numSnapshots=numSnapshots,minNS=minNS)
22 24
 #' }
23 25
 #'
24
-GWCoGAPS <- function(D, S, params, nSets, nCores=NA, saveBySetResults=FALSE,
26
+GWCoGAPS <- function(D, S, nFactor, nSets, nCores=NA, saveBySetResults=FALSE,
25 27
 fname="GWCoGAPS.AP.fixed", PatternsMatchFN = patternMatch4Parallel, Cut=NA,
26 28
 minNS=NA, ...)
27 29
 {
28
-    # v2 style parameters compatibility
29
-    params <- oldParams(params, ...)
30
-
31 30
     # establish the number of cores that you are able to use
32 31
     if(is.na(nCores))
33
-        nCores <- nSets
32
+        nCores<-nSets
34 33
     registerDoParallel(cores=nCores)
35 34
 
36 35
     # break the data into sets
37 36
     genesInSets <- createGWCoGAPSSets(data=D, nSets=nSets, keep=FALSE)
38 37
 
39 38
     # set gene min to 0
40
-    D <- sweep(D, 1, apply(D,1,function(x) pmin(0,min(x))))
39
+    D <- sweep(D, 1, apply(D,1,function(x){pmin(0,min(x))}))
41 40
 
42 41
     #generate seeds for parallelization
43 42
     nut <- generateSeeds(chains=nSets, seed=-1)
... ...
@@ -47,15 +46,14 @@ minNS=NA, ...)
47 46
     {
48 47
         D <- as.matrix(D[genesInSets[[i]],])
49 48
         S <- as.matrix(S[genesInSets[[i]],])
50
-        params <- setParam(params, 'seed', nut[i])
51
-        CoGAPS(D=D, S=S, params)
49
+        gapsRun(D=D, S=S, nFactor=nFactor,seed=nut[i],...)
52 50
     }
53 51
 
54
-    BySet <- reOrderBySet(AP=AP, nFactor=nFactor, nSets=nSets)
52
+    BySet <- reOrderBySet(AP=AP,nFactor=nFactor,nSets=nSets)
55 53
 
56 54
     #run postpattern match function
57 55
     if (is.na(Cut))
58
-        Cut <- nFactor
56
+        Cut<-nFactor
59 57
     matchedPs <- patternMatch4Parallel(Ptot=BySet$P, nP=nFactor, nSets=nSets,
60 58
         cnt=Cut, minNS=minNS, bySet=TRUE)
61 59
 
... ...
@@ -63,29 +61,27 @@ minNS=NA, ...)
63 61
     class(AP) <- append(class(AP),"CoGAPS")
64 62
     if (saveBySetResults==TRUE)
65 63
     {
66
-        save(AP,BySet,matchedPs,file=sprintf('APbySet.%s.nP%d.set%d.Rda',fname, nFactor,nSets))
64
+        save(AP, BySet, matchedPs, file=sprintf('APbySet.%s.nP%d.set%d.Rda',
65
+            fname, nFactor, nSets))
67 66
         message(sprintf('APbySet.%s.nP%d.set%d.Rda',fname, nFactor,nSets))
68 67
     }
69 68
 
70 69
     PbySet <- matchedPs[["PBySet"]]
71 70
     matchedPs <- matchedPs[[1]]
72 71
 
73
-    # generate seeds for parallelization
72
+    #generate seeds for parallelization
74 73
     nut <- generateSeeds(chains=nSets, seed=-1)
75 74
 
76
-    # final number of factors
75
+    #final number of factors
77 76
     nFactorFinal <- dim(matchedPs)[1]
78 77
 
79 78
     # run fixed CoGAPS
80
-    params <- setParam('fixedMatrix', as.matrix(matchedPs))
81
-    params <- setParam('whichMatrixFixed', 'P')
82
-    params <- setParam('nFactor', nFactorFinal)
83 79
     Fixed <- foreach(i=1:nSets) %dopar%
84 80
     {
85
-    	D <- as.matrix(D[genesInSets[[i]],])
86
-    	S <- as.matrix(S[genesInSets[[i]],])
87
-        params <- setParam(params, 'seed', nut[i])
88
-        CoGAPS(D=D, S=S, params)
81
+        D <- as.matrix(D[genesInSets[[i]],])
82
+        S <- as.matrix(S[genesInSets[[i]],])
83
+        AP <- gapsMapRun(D, S, FP=matchedPs, nFactor=nFactorFinal,
84
+            fixedMatrix = "P",seed=nut[i],...)
89 85
     }
90 86
 
91 87
     #extract A and Asds
... ...
@@ -95,120 +91,6 @@ minNS=NA, ...)
95 91
     AP.fixed <- list("Amean"=As4fixPs$A, "Asd"=As4fixPs$Asd, "Pmean"=matchedPs,
96 92
         "PbySet"=PbySet)
97 93
     class(AP.fixed) <- append(class(AP.fixed),"CoGAPS")
98
-    save(AP.fixed, file=paste(fname, ".Rda", sep=""))
99
-    message(paste(fname, ".Rda", sep=""))
100
-}
101
-
102
-#' Create Gene Sets for GWCoGAPS
103
-#'
104
-#'\code{createGWCoGAPSSets} factors whole genome data into randomly generated sets for indexing;
105
-#'
106
-#'@param data data matrix with unique rownames
107
-#'@param nSets number of sets for parallelization
108
-#'@param outRDA name of output file
109
-#'@param keep logical indicating whether or not to save gene set list. Default is TRUE.
110
-#'@export
111
-#'@return list with randomly generated sets of genes from whole genome data
112
-#'@examples \dontrun{
113
-#'createGWCoGAPSSet(D,nSets=nSets)
114
-#'}
115
-createGWCoGAPSSets <- function(data, nSets, outRDA="GenesInCoGAPSSets.Rda",
116
-keep=TRUE)
117
-{
118
-    genes <- rownames(data)
119
-    setSize <- floor(length(genes) / nSets)
120
-    genesInSets <- list()
121
-    for (set in 1:nSets)
122
-    {
123
-        if (set != nSets)
124
-            genesInSets[[set]] <- sample(genes,setSize)
125
-        if (set == nSets)
126
-            genesInSets[[set]] <- genes
127
-        genes <- genes[!genes %in% genesInSets[[set]]]
128
-    }
129
-
130
-    if (!identical(sort(unlist(genesInSets)),sort(rownames(data))))
131
-        warning("Gene identifiers not unique!")
132
-    if (keep == TRUE)
133
-        save(list=c('genesInSets'), file=outRDA)
134
-    return(genesInSets)
135
-}
136
-
137
-#' Generate Seeds for Multiple Concurrent Runs
138
-#'
139
-#' @param chains number of seeds to generate (number of chains to run)
140
-#' @param seed positive values are kept, negative values will be overwritten
141
-#'  by a seed generated from the current time
142
-#' @return vector of randomly generated seeds
143
-#' @export
144
-#' @examples
145
-#' generateSeeds(chains=2, seed=-1)
146
-generateSeeds <- function(chains=2, seed=-1)
147
-{
148
-    if (chains < 2 | (as.integer(chains) != chains))
149
-        stop("chains must be >= 2 and an integer")
150
-
151
-    if (seed < 0)
152
-    {
153
-        secs <- as.numeric(difftime(Sys.time(), paste(Sys.Date(), "00:00"),
154
-            units="secs"))
155
-        return(seq_len(chains) * round(secs))
156
-    }
157
-    else
158
-    {
159
-        return(seq_len(chains) * seed)
160
-    }
161
-}
162
-
163
-#' reOrderBySet
164
-#'
165
-#' @description <restructures output of gapsRun into a list containing each sets solution for Amean, Pmean, and Asd>
166
-#' @param AP output of gapsRun in parallel
167
-#' @param nFactor number of patterns
168
-#' @param nSets number of sets
169
-#'
170
-#' @return a list containing the \code{nSets} sets solution for Amean under "A", Pmean under "P", and Asd under "Asd"
171
-#' @export
172
-#'
173
-#' @examples \dontrun{
174
-#' reOrderBySet(AP,nFactor,nSets)
175
-#' }
176
-#'
177
-reOrderBySet<-function(AP, nFactor, nSets)
178
-{
179
-    P <- do.call(rbind,lapply(AP, function(x) x$Pmean))
180
-    rownames(P) <- paste(rep(1:nSets,each=nFactor),rep(1:nFactor,nSets),sep=".")
181
-    A <- lapply(AP, function(x) x$Amean)
182
-    Asd <- lapply(AP, function(x) x$Asd)
183
-    names(A) <- names(Asd) <- paste(rep("Set", nSets), rep(1:nSets), sep="")
184
-    return(list("A"=A, "Asd"=Asd, "P"=P))
185
-}
186
-
187
-#' postFixed4Parallel
188
-#'
189
-#' @param AP.fixed output of parallel gapsMapRun calls with same FP
190
-#' @param setPs data.frame with rows giving fixed patterns for P used as input for gapsMapRun
191
-#'
192
-#' @return list of two data.frames containing the A matrix estimates or their corresponding standard deviations
193
-#' from output of parallel gapsMapRun
194
-#' @export
195
-postFixed4Parallel <- function(AP.fixed=NA, setPs=NA)
196
-{
197
-    ASummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Amean))
198
-    Asd <- do.call(rbind,lapply(AP.fixed, function(x) x$Asd))
199
-    #PSummary <- do.call(rbind,lapply(AP.fixed, function(x) x$Pmean))
200
-    PSummary <- AP.fixed[[1]]$Pmean
201
-
202
-    Pmax <- apply(PSummary,1,max)
203
-    Pneu <- sweep(PSummary,1,Pmax,FUN="/")
204
-    Aneu <- sweep(ASummary,2,Pmax,FUN="*")
205
-
206
-    X <- apply(Pneu,1,range)
207
-    Y <- apply(setPs,1,range)
208
-    colnames(X) <- colnames(Y)
209
-    if (!all.equal(X, Y, tolerance=1e-5))
210
-        warning("Patterns do not match fixed values.")
211
-
212
-    As4fixPs <- list("A"=Aneu, "Asd"=Asd)
213
-    return(As4fixPs)
214
-}
94
+    save(AP.fixed, file=paste(fname,".Rda",sep=""))
95
+    message(paste(fname,".Rda",sep=""))
96
+}
215 97
\ No newline at end of file
... ...
@@ -1,12 +1,16 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval) {
5
-    .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval)
4
+cogaps_cpp <- function(DMatrix, SMatrix, fixedPatterns, params) {
5
+    .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', DMatrix, SMatrix, fixedPatterns, params)
6 6
 }
7 7
 
8
-cogapsFromCheckpoint <- function(fileName) {
9
-    .Call('_CoGAPS_cogapsFromCheckpoint', PACKAGE = 'CoGAPS', fileName)
8
+cogapsFromCheckpoint_cpp <- function(D, S, fileName) {
9
+    .Call('_CoGAPS_cogapsFromCheckpoint_cpp', PACKAGE = 'CoGAPS', D, S, fileName)
10
+}
11
+
12
+displayBuildReport_cpp <- function() {
13
+    invisible(.Call('_CoGAPS_displayBuildReport_cpp', PACKAGE = 'CoGAPS'))
10 14
 }
11 15
 
12 16
 run_catch_unit_tests <- function() {
13 17
new file mode 100644
... ...
@@ -0,0 +1,24 @@
1
+#' Binary Heatmap for Standardized A Matrix
2
+#'
3
+#' @details creates a binarized heatmap of the A matrix
4
+#'  in which the value is 1 if the value in Amean is greater than
5
+#'  threshold * Asd and 0 otherwise
6
+#' @param Amean the mean estimate for the A matrix
7
+#' @param Asd the standard deviations on Amean
8
+#' @param threshold the number of standard deviations above zero
9
+#'  that an element of Amean must be to get a value of 1
10
+#' @export
11
+binaryA <-function(Amean, Asd, threshold=3)
12
+{
13
+    BinA_Map <- ifelse(Amean/Asd > threshold,1,0)
14
+    colnames(BinA_Map) <- colnames(BinA_Map, do.NULL=FALSE, prefix = "Pattern ")
15
+    rownames(BinA_Map) <- rep(" ",nrow(BinA_Map))
16
+
17
+    heatmap.2(BinA_Map, Rowv = FALSE, Colv = FALSE,dendrogram="none",
18
+        scale="none",col = brewer.pal(3,"Blues"), trace="none",
19
+        density.info="none",cexCol=1.3,srtCol=45,
20
+        lmat=rbind(c(0, 3),c(2,1),c(0,4) ),
21
+        lwid=c(1,10),lhei=c(1, 4, 1.2 ),
22
+        main="Heatmap of Standardized A Matrix")
23
+    mtext(paste("(Threshold = ", threshold, ")"))
24
+}
0 25
\ No newline at end of file
1 26
new file mode 100644
... ...
@@ -0,0 +1,127 @@
1
+#' Calculate Gene Set Statistics
2
+#'
3
+#' @details calculates the gene set statistics for each
4
+#'  column of A using a Z-score from the elements of the A matrix,
5
+#'  the input gene set, and permutation tests
6
+#' @param Amean A matrix mean values
7
+#' @param Asd A matrix standard deviations
8
+#' @param GStoGenes data.frame or list with gene sets
9
+#' @param numPerm number of permutations for null
10
+#' @export
11
+calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500)
12
+{
13
+    # test for std dev of zero, possible mostly in simple simulations
14
+    if (sum(Asd==0) > 0)
15
+    {
16
+        #temp <- min(Asd[Asd>0])
17
+        Asd[Asd==0] <- .Machine$double.eps
18
+    }
19
+
20
+    # calculate Z scores
21
+    zMatrix <- calcZ(Amean,Asd)
22
+
23
+    if (is(GStoGenes, "GSA.genesets"))
24
+    {
25
+        names(GStoGenes$genesets) <- GStoGenes$geneset.names
26
+        GStoGenes <- GStoGenes$genesets
27
+    }
28
+    else if (is(GStoGenes, "list"))
29
+    {
30
+        GStoGenesList <- GStoGenes
31
+    }
32
+    else if (is(GStoGenes, "data.frame"))
33
+    {
34
+        GStoGenesList <- list()
35
+        for (i in 1:dim(GStoGenes)[2])
36
+        {
37
+            GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- 
38
+                as.character(unique(GStoGenes[,i]))
39
+        }
40
+    }
41
+    else
42
+    {
43
+        stop(paste("GStoGenes must be a data.frame, GSA.genesets, or list with",
44
+            "format specified in the users manual."))
45
+    }
46
+
47
+    # get dimensions
48
+    numGS   <- length(names(GStoGenesList))
49
+    numPatt <- dim(zMatrix)[2]
50
+    numG    <- dim(zMatrix)[1]+0.9999
51
+
52
+    # initialize matrices
53
+    statsUp   <- matrix(ncol = numGS, nrow = numPatt)
54
+    statsDown <- matrix(ncol = numGS, nrow = numPatt)
55
+    actEst    <- matrix(ncol = numGS, nrow = numPatt)
56
+    results   <- list()
57
+    zPerm     <- matrix(ncol=numPerm,nrow=numPatt)
58
+
59
+    # do permutation test
60
+    for (gs in 1:numGS)
61
+    {
62
+        genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
63
+        index <- rownames(zMatrix) %in% genes
64
+        zValues <- zMatrix[index,1]
65
+        numGenes <- length(zValues)
66
+        label <- as.character(numGenes)
67
+
68
+        if (!any(names(results)==label))
69
+        {
70
+            for (p in 1:numPatt)
71
+            {
72
+                for (j in 1:numPerm)
73
+                {
74
+                    temp <- floor(runif(numGenes,1,numG))
75
+                    temp2 <- zMatrix[temp,p]
76
+                    zPerm[p,j] <- mean(temp2)
77
+                }
78
+            }
79
+            results[[label]] <- zPerm
80
+        }
81
+    }
82
+
83
+    # get p-value
84
+    for (p in 1:numPatt)
85
+    {
86
+        for (gs in 1:numGS)
87
+        {
88
+            genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
89
+            index <- rownames(zMatrix) %in% genes
90
+            zValues <- zMatrix[index,p]
91
+            zScore <- mean(zValues)
92
+
93
+            numGenes <- length(zValues)
94
+            label <- as.character(numGenes)
95
+
96
+            permzValues <- results[[label]][p,]
97
+            ordering <- order(permzValues)
98
+            permzValues <- permzValues[ordering]
99
+
100
+            statistic <- sum(zScore > permzValues)
101
+            statUpReg <- 1 - statistic/length(permzValues)
102
+            statsUp[p,gs] <- max(statUpReg, 1/numPerm)
103
+
104
+            statistic <- sum(zScore < permzValues)
105
+            statDownReg <- 1 - statistic/length(permzValues)
106
+            statsDown[p,gs] <- max(statDownReg,1/numPerm)
107
+
108
+            activity <- 1 - 2*max(statUpReg, 1/numPerm)
109
+            actEst[p,gs] <- activity
110
+        }
111
+    }
112
+
113
+    # format output
114
+    colnames(statsUp) <- names(GStoGenesList)
115
+    colnames(statsDown) <- names(GStoGenesList)
116
+    colnames(actEst) <- names(GStoGenesList)
117
+
118
+    rownames(statsUp) <- colnames(zMatrix)
119
+    rownames(statsDown) <- colnames(zMatrix)
120
+    rownames(actEst) <- colnames(zMatrix)
121
+
122
+    results[['GSUpreg']] <- statsUp
123
+    results[['GSDownreg']] <- statsDown
124
+    results[['GSActEst']] <- actEst
125
+
126
+    return(results)
127
+}
0 128
\ No newline at end of file
1 129
new file mode 100644
... ...
@@ -0,0 +1,80 @@
1
+#' Probability Gene Belongs in Gene Set
2
+#'
3
+#' @details calculates the probability that a gene
4
+#'  listed in a gene set behaves like other genes in the set within
5
+#'  the given data set
6
+#' @param Amean A matrix mean values
7
+#' @param Asd A matrix standard deviations
8
+#' @param GSGenes data.frame or list with gene sets
9
+#' @param numPerm number of permutations for null
10
+#' @param Pw weight on genes
11
+#' @param nullGenes - logical indicating gene adjustment
12
+#' @export
13
+calcGeneGSStat  <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)),
14
+nullGenes=FALSE)
15
+{
16
+    gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), numPerm=numPerm)
17
+    gsStat <- gsStat$GSUpreg
18
+    gsStat <- -log(gsStat)
19
+
20
+    if (!all(is.na(Pw)))
21
+    {
22
+        if (length(Pw) != length(gsStat))
23
+        {
24
+            stop('Invalid weighting')
25
+        }
26
+        gsStat <- gsStat*Pw
27
+    }
28
+
29
+    if (nullGenes)
30
+    {
31
+        ZD <- Amean[setdiff(row.names(Amean), GSGenes),] /
32
+            Asd[setdiff(row.names(Amean), GSGenes),]
33
+    }
34
+    else
35
+    {
36
+        ZD <- Amean[GSGenes,]/Asd[GSGenes,]
37
+    }
38
+    outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat))
39
+    outStats <- outStats / apply(ZD,1,sum)
40
+    outStats[which(apply(ZD,1,sum) < .Machine$double.eps)] <- 0
41
+
42
+    if (sum(gsStat) < .Machine$double.eps)
43
+    {
44
+        return(0)
45
+    }
46
+    return(outStats)
47
+}
48
+
49
+#' Compute Gene Probability
50
+#'
51
+#' @param Amean A matrix mean values
52
+#' @param Asd A matrix standard deviations
53
+#' @param GSGenes data.frame or list with gene sets
54
+#' @param Pw weight on genes
55
+#' @param numPerm number of permutations for null
56
+#' @param PwNull - logical indicating gene adjustment
57
+computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)),
58
+numPerm=500, PwNull=FALSE)
59
+{
60
+    geneGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, Pw=Pw,
61
+        GSGenes=GSGenes, numPerm=numPerm)
62
+
63
+    if (PwNull)
64
+    {
65
+        permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd,
66
+            GSGenes=GSGenes, numPerm=numPerm, Pw=Pw,
67
+            nullGenes=TRUE)
68
+    }
69
+    else
70
+    {
71
+        permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd,
72
+            GSGenes=GSGenes, numPerm=numPerm,
73
+            nullGenes=TRUE)
74
+    }
75
+
76
+    finalStats <- sapply(GSGenes, function(x)
77
+        length(which(permGSStat > geneGSStat[x])) / length(permGSStat))
78
+
79
+    return(finalStats)
80
+}
0 81
new file mode 100644
... ...
@@ -0,0 +1,32 @@
1
+#' Compute Z-Score Matrix
2
+#'
3
+#' @details calculates the Z-score for each element based on input mean
4
+#'  and standard deviation matrices
5
+#' @param meanMat matrix of mean values
6
+#' @param sdMat matrix of standard deviation values
7
+#' @export
8
+calcZ <- function(meanMat, sdMat)
9
+{
10
+    # find matrix dimensions
11
+    nrows <- dim(meanMat)[1]
12
+    ncols <- dim(meanMat)[2]
13
+
14
+    check <- dim(sdMat)[1]
15
+    if (nrows != check)
16
+    {
17
+        stop("Number of rows in the mean and standard deviation of A do not agree.")
18
+    }
19
+
20
+    check <- dim(sdMat)[2]
21
+    if (ncols != check)
22
+    {
23
+        stop("Number of columns in the mean and standard deviation of A do not agree.")
24
+    }
25
+
26
+    # compute the matrix of z scores
27
+    zMat <- meanMat/sdMat
28
+    rownames(zMat) <- rownames(meanMat)
29
+    colnames(zMat) <- colnames(meanMat)
30
+
31
+    return(zMat)
32
+}
0 33
\ No newline at end of file
... ...
@@ -23,40 +23,44 @@
23 23
 setClass('GapsParams', slots = c(
24 24
     nFactor = 'integer',
25 25
     nEquil = 'integer',
26
+    nEquilCool = 'integer',
26 27
     nSample = 'integer',
27 28
     nOutput = 'integer',
28 29
     nSnapshots = 'integer',
29
-    alphaA  = 'numeric',
30
-    alphaP  = 'numeric',
30
+    alphaA = 'numeric',
31
+    alphaP = 'numeric',
31 32
     maxGibbmassA = 'numeric',
32 33
     maxGibbmassP = 'numeric',
33 34
     seed = 'integer',
34 35
     messages  = 'logical',
35
-    singleCelLRNASeq = 'logical',
36
+    singleCellRNASeq = 'logical',
36 37
     fixedPatterns = 'matrix',
37 38
     whichMatrixFixed = 'character',
38 39
     checkpointInterval = 'integer',
39 40
     nCores = 'integer'
40 41
 ))
41 42
 
43
+#' @importFrom methods callNextMethod
42 44
 setMethod('initialize', 'GapsParams', 
43 45
     function(.Object, nFactor, nEquil, nSample, ...)
44 46
     {
45
-        .Object@nFactor <- nFactor
46
-        .Object@nEquil <- nEquil
47
-        .Object@nSample <- nSample
48
-        .Object@nOutput <- 1000
49
-        .Object@numSnapshots <- 0
47
+        .Object@nFactor <- as.integer(nFactor)
48
+        .Object@nEquil <- as.integer(nEquil)
49
+        .Object@nEquil <- as.integer(floor(nEquil/10))
50
+        .Object@nSample <- as.integer(nSample)
51
+        .Object@nOutput <- as.integer(1000)
52
+        .Object@nSnapshots <- as.integer(0)
50 53
         .Object@alphaA <- 0.01
51 54
         .Object@alphaP <- 0.01
52 55
         .Object@maxGibbmassA <- 100.0
53 56
         .Object@maxGibbmassP <- 100.0
54
-        .Object@seed <- -1
57
+        .Object@seed <- as.integer(-1)
55 58
         .Object@messages <- TRUE
56
-        .Object@singleCelLRNASeq <- FALSE
59
+        .Object@singleCellRNASeq <- FALSE
57 60
         .Object@fixedPatterns <- matrix(0)
58 61
         .Object@whichMatrixFixed <- 'N'
59
-        .Object@checkpointInterval <- 0
62
+        .Object@checkpointInterval <- as.integer(0)
63
+        .Object@nCores <- as.integer(1)
60 64
 
61 65
         .Object <- callNextMethod(.Object, ...)
62 66
         .Object
... ...
@@ -73,14 +77,24 @@ setValidity('GapsParams',
73 77
     }
74 78
 )
75 79
 
76
-#' @rdname getParam
80
+##################### Generics ###################
81
+
82
+#' @export
83
+setGeneric('getParam', function(object, name)
84
+    {standardGeneric('getParam')})
85
+
86
+#' @export
87
+setGeneric('setParam', function(object, name, value)
88
+    {standardGeneric('setParam')})
89
+
90
+##################### Methods ####################
91
+
77 92
 #' @importFrom methods slot
78 93
 setMethod("getParam", "GapsParams", function(object, name)
79 94
 {
80 95
     slot(object, name)
81 96
 })
82 97
 
83
-#' @rdname setParam
84 98
 #' @importFrom methods slot<- validObject
85 99
 setMethod("setParam", "GapsParams", function(object, name, value)
86 100
 {
... ...
@@ -89,7 +103,6 @@ setMethod("setParam", "GapsParams", function(object, name, value)
89 103
     return(object)
90 104
 })
91 105
 
92
-#' @importFrom methods slotNames
93 106
 setMethod("show", "GapsParams", function(object)
94 107
 {
95 108
     cat("CoGAPS Parameters Object\n")
... ...
@@ -104,4 +117,4 @@ checkParamsWithData <- function(params, nRD, nCD, nRS, nCS)
104 117
         stop('invalid fixed pattern length')
105 118
     if (params@whichMatrixFixed == 'P' & ncol(params@fixedPatterns) != nCD)
106 119
         stop('invalid fixed pattern length')
107
-})
120
+}
108 121
new file mode 100644
... ...
@@ -0,0 +1,34 @@
1
+#' Create Gene Sets for GWCoGAPS
2
+#'
3
+#' @details factors whole genome data into randomly generated sets for indexing
4
+#'
5
+#' @param data data matrix with unique rownames
6
+#' @param nSets number of sets for parallelization
7
+#' @param outRDA name of output file
8
+#' @param keep logical indicating whether or not to save gene set list.
9
+#' @return list with randomly generated sets of genes from whole genome data
10
+#' @examples \dontrun{
11
+#'createGWCoGAPSSet(D,nSets=nSets)
12
+#'}
13
+#' @export
14
+createGWCoGAPSSets<-function(data=D, nSets=nSets,
15
+outRDA="GenesInCoGAPSSets.Rda", keep=TRUE)
16
+{
17
+    genes=rownames(data)
18
+    setSize=floor(length(genes)/nSets)
19
+    genesInSets <- list()
20
+    for (set in 1:nSets)
21
+    {
22
+        if(set!=nSets)
23
+            genesInSets[[set]] <- sample(genes,setSize)
24
+        if(set==nSets)
25
+            genesInSets[[set]] <- genes
26
+        genes=genes[!genes%in%genesInSets[[set]]]
27
+    }
28
+    if (!identical(sort(unlist(genesInSets)),sort(rownames(data))))
29
+        message("Gene identifiers not unique!")
30
+    if (keep==TRUE)
31
+        save(list=c('genesInSets'),file=outRDA)
32
+    return(genesInSets)
33
+}
34
+
0 35
new file mode 100644
... ...
@@ -0,0 +1,144 @@
1
+#Calculates significant genes in each pattern according to certain threshold
2
+#Returns the significant gene names as well as well as the means of these matrices and number of genes in each
3
+
4
+gapsInterPattern <- function(Amean, Asd, sdThreshold = 3)
5
+{
6
+    #number of rows and cols of Asd
7
+    numGenes = length(Asd[,1]);
8
+    numCols = length(Asd[1,]);
9
+
10
+    #Vector holding the number of each significant gene in each column
11
+    sigGeneNums = data.frame();
12
+
13
+    #Temp number of sig genes in the col
14
+    sigCount = 0;
15
+
16
+    #Number to catch the amount of columns without significant genes
17
+    numEmptyCols = 0;
18
+
19
+    #Keep an array of the significant gene counts
20
+    significantGeneNums = c(0);
21
+
22
+    #Names of the genes from the data matrix
23
+    geneNames = names(Asd[,1]);
24
+
25
+    #Names of the genes that are significant from the data matrix
26
+    sigGeneNames = "0";
27
+
28
+    #The numerator of our statistic
29
+    dimensionStatNumerator = 0;
30
+
31
+    #The denominator of our statistic
32
+    dimensionStatDenominator = 0;
33
+
34
+    #The value of our statistic
35
+    dimensionStatistic = 0;
36
+
37
+    #A matrix holding the values of our statistics
38
+    dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols);
39
+
40
+    #The mean of the statistic matrix
41
+    sbar = 0;
42
+
43
+    #A list to return the significant genes in each col and the statistic matrix
44
+    results = list(list());
45
+
46
+    #Scan in the significant genes from each column of Asd
47
+    #The columns of sigGeneNums hold the significant genes for each col of Asd
48
+    for(i in 1:numCols)
49
+    {
50
+        sigCount = 0;
51
+        for(j in 1:numGenes)
52
+        {
53
+            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
54
+            {
55
+                sigCount = sigCount + 1;
56
+                sigGeneNums[sigCount, i] = j;
57
+            }
58
+        }
59
+
60
+        if(sigCount == 0)
61
+        {
62
+            sigGeneNums[1, i] = 0;
63
+            numEmptyCols = numEmptyCols + 1;
64
+        }
65
+
66
+        #Save the number of sigGenes
67
+        significantGeneNums[i] = sigCount;
68
+
69
+        #Get the names and store them
70
+        if(sigCount != 0)
71
+        {
72
+            for(k in 1:sigCount)
73
+            {
74
+                sigGeneNames[k] = geneNames[sigGeneNums[k, i]];
75
+            }
76
+            results[[1]][[i]] = sigGeneNames;
77
+            sigGeneNames = "0";
78
+        }
79
+        else
80
+        {
81
+            results[[1]][[i]] = "None";
82
+        }
83
+    }
84
+
85
+    if(any(significantGeneNums == 0))
86
+    {
87
+        zeroSigCols = which(significantGeneNums == 0);
88
+        print("Warning: No Significant Genes in Pattern(s): ");
89
+
90
+        for(z in 1:length(zeroSigCols))
91
+        {
92
+            print(zeroSigCols[z]);
93
+        }
94
+    }
95
+
96
+    #Now that we have the significant genes want to see if these genes are significant in other columns
97
+    #Fill across the row then down the column
98
+
99
+    #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i'
100
+    for(i in 1:numCols)
101
+    {
102
+        for(j in 1:numCols)
103
+        {
104
+            #Grab the number of significant genes from the interested column
105
+            sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
106
+
107
+            if(sigCount != 0)
108
+            {
109
+                dimensionStatDenominator = sigCount;
110
+                dimensionStatNumerator = 0;
111
+
112
+                #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd.
113
+                #Find the significance of these genes, Amean - threshold * Asd
114
+                for(k in 1:sigCount)
115
+                {
116
+                    if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0)
117
+                    {
118
+                        dimensionStatNumerator = dimensionStatNumerator + 1;
119
+                    }
120
+                }
121
+
122
+                dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator;
123
+
124
+                dimensionStatisticMatrix[i, j] = dimensionStatistic;
125
+            }
126
+            else
127
+            {
128
+                dimensionStatisticMatrix[i, j] = 0;
129
+            }
130
+        }
131
+    }
132
+
133
+    #Find mean of the matrices (excluding the diagonal elements)
134
+    #we can subtract numCols as the diagonal elements are 1
135
+    sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols)));
136
+
137
+    results[[2]] = significantGeneNums;
138
+    results[[3]] = t(dimensionStatisticMatrix);
139
+    results[[4]] = sbar;
140
+
141
+    names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue");
142
+
143
+    return(results);
144
+}
0 145
\ No newline at end of file
1 146
new file mode 100644
... ...
@@ -0,0 +1,135 @@
1
+#Calculates significant genes in each pattern according to certain threshold
2
+#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices
3
+
4
+gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3)
5
+{
6
+    #number of rows and cols of Asd
7
+    numGenes = length(Asd[,1]);
8
+    numCols = length(Asd[1,]);
9
+
10
+    #number of samples in DMatrix
11
+    numSamp = ncol(DMatrix);
12
+
13
+    #Vector holding the number of each significant gene in each column
14
+    sigGeneNums = data.frame();
15
+
16
+    #Temp number of sig genes in the col
17
+    sigCount = 0;
18
+
19
+    #Keep an array of the significant gene counts
20
+    significantGeneNums = c(0);
21
+
22
+    #A matrix to hold the significant genes in D for the current pattern
23
+    #The matrix just acts as a subset of D, just eliminates non relevant rows
24
+    tempSubsetD = matrix();
25
+
26
+    #A matrix holding the values of our correlation coefficients between genes for the current column
27
+    tempGeneCorrMatrix = matrix();
28
+
29
+    #A list to hold all the correlation matrices
30
+    geneCorrMatrices = list();
31
+
32
+    #A list to hold all the means
33
+    geneCorrMatrMeans = list();
34
+
35
+    #The mean of all the correlation matrices
36
+    cbar = 0;
37
+
38
+    #A list to return the means and the matrices
39
+    results = list();
40
+
41
+    #Scan in the significant genes from each column of Asd
42
+    #The columns of sigGeneNums hold the significant genes for each col of Asd
43
+    for(i in 1:numCols)
44
+    {
45
+        sigCount = 0;
46
+        for(j in 1:numGenes)
47
+        {
48
+            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
49
+            {
50
+                sigCount = sigCount + 1;
51
+                sigGeneNums[sigCount, i] = j;
52
+            }
53
+        }
54
+
55
+        if(sigCount == 0)
56
+        {
57
+            sigGeneNums[1, i] = 0;
58
+        }
59
+
60
+        #Save the number of sigGenes
61
+        significantGeneNums[i] = sigCount;
62
+    }
63
+
64
+    #If a pattern has no significant genes this is clearly an error so return such
65
+    if(any(significantGeneNums == 0))
66
+    {
67
+        zeroSigCols = which(significantGeneNums == 0);
68
+        warning("Warning: No Significant Genes in Pattern(s): ");
69
+
70
+        for(z in 1:length(zeroSigCols))
71
+        {
72
+            message(zeroSigCols[z]);
73
+        }
74
+    }
75
+
76
+
77
+    #Now that we have the significant genes want to grab these from our original D matrix
78
+    #and find the sigGene x sigGene correlation matrix and find its mean
79
+
80
+    for(j in 1:numCols)
81
+    {
82
+        #Grab the number of significant genes from the interested column
83
+        sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
84
+
85
+        if(sigCount != 0)
86
+        {
87
+
88
+            #loop through the number of significant genes and pull out the rows of D that represent these genes.
89
+            #Then find the correlation between them with the built in R corr function
90
+            tempSubsetD = matrix(nrow = sigCount, ncol = numSamp);
91
+            for(k in 1:sigCount)
92
+            {
93
+                #Subset D based on significant Genes
94
+                #need to transpose as it reads this in as column vector otherwise
95
+                tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]);
96
+            }
97
+
98
+            #Find the correlation between these genes in D
99
+            #Need to transpose as it calculates correlations between the columns
100
+            tempGeneCorrMatrix = cor(t(tempSubsetD));
101
+
102
+            #Find the mean of this matrix
103
+            tempGeneCorrMatrMean = mean(tempGeneCorrMatrix);
104
+
105
+        }
106
+        else
107
+        {
108
+            tempGeneCorrMatrix = 0;
109
+            tempGeneCorrMatrMean = 0;
110
+        }
111
+
112
+        #Save these in the overall list
113
+        geneCorrMatrices[[j]] = tempGeneCorrMatrix;
114
+        geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean;
115
+
116
+    }
117
+
118
+    #Return as an overall list of lists
119
+    # We return Corr Matrices themselves, their means, and the means of the means (cbar)
120
+    results[[1]] = geneCorrMatrices;
121
+    results[[2]] = geneCorrMatrMeans;
122
+
123
+    #Return as an overall list of lists
124
+    for(i in 1:numCols)
125
+    {
126
+        cbar = cbar + results[[2]][[i]];
127
+    }
128
+
129
+    cbar = cbar/numCols;
130
+    results[[3]] = cbar;
131
+
132
+    names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue");
133
+
134
+    return(results);
135
+}
0 136
\ No newline at end of file
1 137
deleted file mode 100644
... ...
@@ -1,237 +0,0 @@
1
-# Output: list with gene set statistics
2
-#' Calculate Gene Set Statistics
3
-#'
4
-#' @details calculates the gene set statistics for each
5
-#'  column of A using a Z-score from the elements of the A matrix,
6
-#'  the input gene set, and permutation tests
7
-#' @param Amean A matrix mean values
8
-#' @param Asd A matrix standard deviations
9
-#' @param GStoGenes data.frame or list with gene sets
10
-#' @param numPerm number of permutations for null
11
-#' @export
12
-calcCoGAPSStat <- function (Amean, Asd, GStoGenes, numPerm=500)
13
-{
14
-    # test for std dev of zero, possible mostly in simple simulations
15
-    if (sum(Asd==0) > 0)
16
-        Asd[Asd==0] <- .Machine$double.eps
17
-
18
-    # calculate Z scores
19
-    zMatrix <- calcZ(Amean,Asd)
20
-
21
-    # check input arguments
22
-    if (!is(GStoGenes, "data.frame") && !is(GStoGenes, "list") && !is(GStoGenes,"GSA.genesets"))
23
-        stop("GStoGenes must be a data.frame,GSA.genesets, or list with format specified in the users manual.")
24
-
25
-    if (is(GStoGenes, "GSA.genesets"))
26
-    {
27
-        names(GStoGenes$genesets) <- GStoGenes$geneset.names
28
-        GStoGenes <- GStoGenes$genesets
29
-    }
30
-
31
-    if (is(GStoGenes, "list"))
32
-        GStoGenesList <- GStoGenes
33
-    else
34
-        GStoGenesList <- list()
35
-
36
-    for (i in 1:dim(GStoGenes)[2])
37
-    {
38
-        GStoGenesList[[as.character(colnames(GStoGenes)[i])]] <- as.character(unique(GStoGenes[,i]))
39
-    }
40
-
41
-    # get dimensions
42
-    numGS   <- length(names(GStoGenesList))
43
-    numPatt <- dim(zMatrix)[2]
44
-    numG    <- dim(zMatrix)[1]+0.9999
45
-
46
-    # initialize matrices
47
-    statsUp   <- matrix(ncol = numGS, nrow = numPatt)
48
-    statsDown <- matrix(ncol = numGS, nrow = numPatt)
49
-    actEst    <- matrix(ncol = numGS, nrow = numPatt)
50
-    results   <- list()
51
-    zPerm     <- matrix(ncol=numPerm,nrow=numPatt)
52
-
53
-    # do permutation test
54
-    for (gs in 1:numGS)
55
-    {
56
-        genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
57
-        index <- rownames(zMatrix) %in% genes
58
-        zValues <- zMatrix[index,1]
59
-        numGenes <- length(zValues)
60
-        label <- as.character(numGenes)
61
-
62
-        if (!any(names(results)==label))
63
-        {
64
-            for (p in 1:numPatt)
65
-            {
66
-                for (j in 1:numPerm)
67
-                {
68
-                    temp <- floor(runif(numGenes,1,numG))
69
-                    temp2 <- zMatrix[temp,p]
70
-                    zPerm[p,j] <- mean(temp2)
71
-                }
72
-            }
73
-            results[[label]] <- zPerm
74
-        }
75
-    }
76
-
77
-    # get p-value
78
-    for (p in 1:numPatt)
79
-    {
80
-        for (gs in 1:numGS)
81
-        {
82
-            genes <- GStoGenesList[[names(GStoGenesList)[gs]]]
83
-            index <- rownames(zMatrix) %in% genes
84
-            zValues <- zMatrix[index,p]
85
-            zScore <- mean(zValues)
86
-
87
-            numGenes <- length(zValues)
88
-            label <- as.character(numGenes)
89
-
90
-            permzValues <- results[[label]][p,]
91
-            ordering <- order(permzValues)
92
-            permzValues <- permzValues[ordering]
93
-
94
-            statistic <- sum(zScore > permzValues)
95
-            statUpReg <- 1 - statistic/length(permzValues)
96
-            statsUp[p,gs] <- max(statUpReg, 1/numPerm)
97
-
98
-            statistic <- sum(zScore < permzValues)
99
-            statDownReg <- 1 - statistic/length(permzValues)
100
-            statsDown[p,gs] <- max(statDownReg,1/numPerm)
101
-
102
-            activity <- 1 - 2*max(statUpReg, 1/numPerm)
103
-            actEst[p,gs] <- activity
104
-        }
105
-    }
106
-
107
-    # format output
108
-    colnames(statsUp) <- names(GStoGenesList)
109
-    colnames(statsDown) <- names(GStoGenesList)
110
-    colnames(actEst) <- names(GStoGenesList)
111
-
112
-    rownames(statsUp) <- colnames(zMatrix)
113
-    rownames(statsDown) <- colnames(zMatrix)
114
-    rownames(actEst) <- colnames(zMatrix)
115
-
116
-    results[['GSUpreg']] <- statsUp
117
-    results[['GSDownreg']] <- statsDown
118
-    results[['GSActEst']] <- actEst
119
-
120
-    return(results)
121
-}
122
-
123
-# calcGeneGSStat: calculate probability gene belongs in gene set
124
-# History: v1.0 EJF original CoGAPS
125
-
126
-# Inputs: Amean - A matrix mean values
127
-#         Asd - A matrix standard deviations
128
-#         GStoGenes - data.frame or list with gene sets
129
-#         numPerm - number of permutations for null
130
-#         Pw - weighting on genes
131
-#         nullGenes - adjust genes
132
-
133
-# Output: environment with gene set statistics
134
-#       NOTE: should make into list object, env historical
135
-
136
-#'\code{calcGeneGSStat} calculates the probability that a gene
137
-#'listed in a gene set behaves like other genes in the set within
138
-#'the given data set
139
-#'
140
-#'@param Amean A matrix mean values
141
-#'@param Asd A matrix standard deviations
142
-#'@param GSGenes data.frame or list with gene sets
143
-#'@param numPerm number of permutations for null
144
-#'@param Pw weight on genes
145
-#'@param nullGenes - logical indicating gene adjustment
146
-#'@export
147
-calcGeneGSStat  <- function(Amean, Asd, GSGenes, numPerm, Pw=rep(1,ncol(Amean)), nullGenes=FALSE)
148
-{
149
-    gsStat <- calcCoGAPSStat(Amean, Asd, data.frame(GSGenes), numPerm=numPerm)
150
-    gsStat <- gsStat$GSUpreg
151
-
152
-    gsStat <- -log(gsStat)
153
-
154
-    if (!all(is.na(Pw)))
155
-    {
156
-        if (length(Pw) != length(gsStat))
157
-        {
158
-            stop('Invalid weighting')
159
-        }
160
-        gsStat <- gsStat*Pw
161
-    }
162
-
163
-    if (nullGenes)
164
-    {
165
-        ZD <- Amean[setdiff(row.names(Amean), GSGenes),] /
166
-            Asd[setdiff(row.names(Amean), GSGenes),]
167
-    }
168
-    else
169
-    {
170
-        ZD <- Amean[GSGenes,]/Asd[GSGenes,]
171
-    }
172
-    outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat))
173
-    outStats <- outStats / apply(ZD,1,sum)
174
-    outStats[which(apply(ZD,1,sum) < .Machine$double.eps)] <- 0
175
-
176
-    if (sum(gsStat) < .Machine$double.eps)
177
-        return(0)
178
-    return(outStats)
179
-}
180
-
181
-computeGeneGSProb <- function(Amean, Asd, GSGenes, Pw=rep(1,ncol(Amean)),
182
-numPerm=500, PwNull=FALSE)
183
-{
184
-    geneGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, Pw=Pw,
185
-        GSGenes=GSGenes, numPerm=numPerm)
186
-
187
-    if (PwNull)
188
-    {
189
-        permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, GSGenes=GSGenes,
190
-            numPerm=numPerm, Pw=Pw, nullGenes=T)
191
-    }
192
-    else
193
-    {
194
-        permGSStat <- calcGeneGSStat(Amean=Amean, Asd=Asd, GSGenes=GSGenes,
195
-        numPerm=numPerm, nullGenes=T)
196
-    }
197
-
198
-    return(sapply(GSGenes, function(x)
199
-        length(which(permGSStat > geneGSStat[x])) / length(permGSStat)))
200
-}
201
-
202
-#' Compute Z-Score Matrix
203
-#'
204
-#' @details calculates the Z-score for each element based on input mean
205
-#'  and standard deviation matrices
206
-#' @param meanMat matrix of mean values
207
-#' @param sdMat matrix of standard deviation values
208
-#' @export
209
-calcZ <- function (meanMat, sdMat)
210
-{
211
-    if (nrow(meanMat) != nrow(sdMat))
212
-        stop("Number of rows in the mean and standard deviation of A do not agree.")
213
-    if (ncol(meanMat) != ncol(sdMat))
214
-        stop("Number of columns in the mean and standard deviation of A do not agree.")
215
-
216
-    zMat <- meanMat / sdMat
217
-    rownames(zMat) <- rownames(meanMat)
218
-    colnames(zMat) <- colnames(meanMat)
219
-
220
-    return(zMat)
221
-}
222
-
223
-#' Reconstruct Gene
224
-#'
225
-#' @param A A matrix estimates
226
-#' @param P corresponding P matrix estimates
227
-#' @param genes an index of the gene or genes of interest
228
-#' @return the D' estimate of a gene or set of genes
229
-#' @export
230
-reconstructGene<-function(A=NA, P=NA, genes=NA)
231
-{
232
-    Dneu <- A %*% P
233
-    if (!is.na(genes))
234
-        Dneu <- Dneu[genes,]
235
-    return(Dneu)
236
-}
237
-
238 0
new file mode 100644
... ...
@@ -0,0 +1,30 @@
1
+#' Generate Seeds for Multiple Concurrent Runs
2
+#'
3
+#' @param chains number of seeds to generate (number of chains to run)
4
+#' @param seed positive values are kept, negative values will be overwritten
5
+#'  by a seed generated from the current time
6
+#' @return vector of randomly generated seeds
7
+#' @export
8
+#' @examples
9
+#' generateSeeds(chains=2, seed=-1)
10
+generateSeeds <- function(chains=2, seed=-1)
11
+{
12
+    if (chains < 2 || (as.integer(chains) != chains))
13
+    {
14
+        stop("chains must be >= 2 and an integer")
15
+    }
16
+
17
+    if (seed < 1)
18
+    {
19
+        secs <- as.numeric(difftime(Sys.time(), paste(Sys.Date(), "00:00"),
20
+            units="secs"))
21
+        secs <- round(secs)
22
+        seeds <- seq_len(chains) * secs
23
+        return(seeds)
24
+    }
25
+    else
26
+    {
27
+        seeds <- seq_len(chains) * seed
28
+        return(seeds)
29
+    }
30
+}
... ...
@@ -8,8 +8,8 @@
8 8
 #' \tabular{ll}{
9 9
 #' Package: \tab CoGAPS\cr
10 10
 #' Type: \tab Package\cr
11
-#' Version: \tab 1.0\cr
12
-#' Date: \tab 2014-07-23\cr
11
+#' Version: \tab 2.99.0\cr
12
+#' Date: \tab 2018-01-24\cr
13 13
 #' License: \tab LGPL\cr
14 14
 #' }
15 15
 #' @author Maintainer: Elana J. Fertig \email{ejfertig@jhmi.edu},
16 16
deleted file mode 100644
... ...
@@ -1,700 +0,0 @@
1
-#Calculates significant genes in each pattern according to certain threshold
2
-#Returns the significant gene names as well as well as the means of these matrices and number of genes in each
3
-
4
-gapsInterPattern <- function(Amean, Asd, sdThreshold = 3)
5
-{
6
-    #number of rows and cols of Asd
7
-    numGenes = length(Asd[,1]);
8
-    numCols = length(Asd[1,]);
9
-
10
-    #Vector holding the number of each significant gene in each column
11
-    sigGeneNums = data.frame();
12
-
13
-    #Temp number of sig genes in the col
14
-    sigCount = 0;
15
-
16
-    #Number to catch the amount of columns without significant genes
17
-    numEmptyCols = 0;
18
-
19
-    #Keep an array of the significant gene counts
20
-    significantGeneNums = c(0);
21
-
22
-    #Names of the genes from the data matrix
23
-    geneNames = names(Asd[,1]);
24
-
25
-    #Names of the genes that are significant from the data matrix
26
-    sigGeneNames = "0";
27
-
28
-    #The numerator of our statistic
29
-    dimensionStatNumerator = 0;
30
-
31
-    #The denominator of our statistic
32
-    dimensionStatDenominator = 0;
33
-
34
-    #The value of our statistic
35
-    dimensionStatistic = 0;
36
-
37
-    #A matrix holding the values of our statistics
38
-    dimensionStatisticMatrix = matrix(nrow = numCols, ncol = numCols);
39
-
40
-    #The mean of the statistic matrix
41
-    sbar = 0;
42
-
43
-    #A list to return the significant genes in each col and the statistic matrix
44
-    results = list(list());
45
-
46
-    #Scan in the significant genes from each column of Asd
47
-    #The columns of sigGeneNums hold the significant genes for each col of Asd
48
-    for(i in 1:numCols)
49
-    {
50
-        sigCount = 0;
51
-        for(j in 1:numGenes)
52
-        {
53
-            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
54
-            {
55
-                sigCount = sigCount + 1;
56
-                sigGeneNums[sigCount, i] = j;
57
-            }
58
-        }
59
-
60
-        if(sigCount == 0)
61
-        {
62
-            sigGeneNums[1, i] = 0;
63
-            numEmptyCols = numEmptyCols + 1;
64
-        }
65
-
66
-        #Save the number of sigGenes
67
-        significantGeneNums[i] = sigCount;
68
-
69
-        #Get the names and store them
70
-        if(sigCount != 0)
71
-        {
72
-            for(k in 1:sigCount)
73
-            {
74
-                sigGeneNames[k] = geneNames[sigGeneNums[k, i]];
75
-            }
76
-            results[[1]][[i]] = sigGeneNames;
77
-            sigGeneNames = "0";
78
-        }
79
-        else
80
-        {
81
-            results[[1]][[i]] = "None";
82
-        }
83
-    }
84
-
85
-    if(any(significantGeneNums == 0))
86
-    {
87
-        zeroSigCols = which(significantGeneNums == 0);
88
-        print("Warning: No Significant Genes in Pattern(s): ");
89
-
90
-        for(z in 1:length(zeroSigCols))
91
-        {
92
-            print(zeroSigCols[z]);
93
-        }
94
-    }
95
-
96
-    #Now that we have the significant genes want to see if these genes are significant in other columns
97
-    #Fill across the row then down the column
98
-
99
-    #This compares the significant genes in the specified by 'j' to the same genes in Asd in the column specified by 'i'
100
-    for(i in 1:numCols)
101
-    {
102
-        for(j in 1:numCols)
103
-        {
104
-            #Grab the number of significant genes from the interested column
105
-            sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
106
-
107
-            if(sigCount != 0)
108
-            {
109
-                dimensionStatDenominator = sigCount;
110
-                dimensionStatNumerator = 0;
111
-
112
-                #loop through the number of significant genes and compare to these genes in the 'ith' col of Asd.
113
-                #Find the significance of these genes, Amean - threshold * Asd
114
-                for(k in 1:sigCount)
115
-                {
116
-                    if((Amean[sigGeneNums[k,j],i] - (sdThreshold*Asd[sigGeneNums[k,j],i])) > 0)
117
-                    {
118
-                        dimensionStatNumerator = dimensionStatNumerator + 1;
119
-                    }
120
-                }
121
-
122
-                dimensionStatistic = dimensionStatNumerator/dimensionStatDenominator;
123
-
124
-                dimensionStatisticMatrix[i, j] = dimensionStatistic;
125
-            }
126
-            else
127
-            {
128
-                dimensionStatisticMatrix[i, j] = 0;
129
-            }
130
-        }
131
-    }
132
-
133
-    #Find mean of the matrices (excluding the diagonal elements)
134
-    #we can subtract numCols as the diagonal elements are 1
135
-    sbar = ((sum(dimensionStatisticMatrix) - (numCols - numEmptyCols))/(length(dimensionStatisticMatrix) - (numCols - numEmptyCols)));
136
-
137
-    results[[2]] = significantGeneNums;
138
-    results[[3]] = t(dimensionStatisticMatrix);
139
-    results[[4]] = sbar;
140
-
141
-    names(results) = c("SignificantGeneNames", "SignificantGeneTotals", "SeparationMatrix", "InterPatternValue");
142
-
143
-    return(results);
144
-}
145
-
146
-#Calculates significant genes in each pattern according to certain threshold
147
-#Returns the significant gene names as well as well as the correlation matrices between these genes and the means of these matrices
148
-
149
-gapsIntraPattern <- function(Amean, Asd, DMatrix, sdThreshold = 3)
150
-{
151
-    #number of rows and cols of Asd
152
-    numGenes = length(Asd[,1]);
153
-    numCols = length(Asd[1,]);
154
-
155
-    #number of samples in DMatrix
156
-    numSamp = ncol(DMatrix);
157
-
158
-    #Vector holding the number of each significant gene in each column
159
-    sigGeneNums = data.frame();
160
-
161
-    #Temp number of sig genes in the col
162
-    sigCount = 0;
163
-
164
-    #Keep an array of the significant gene counts
165
-    significantGeneNums = c(0);
166
-
167
-    #A matrix to hold the significant genes in D for the current pattern
168
-    #The matrix just acts as a subset of D, just eliminates non relevant rows
169
-    tempSubsetD = matrix();
170
-
171
-    #A matrix holding the values of our correlation coefficients between genes for the current column
172
-    tempGeneCorrMatrix = matrix();
173
-
174
-    #A list to hold all the correlation matrices
175
-    geneCorrMatrices = list();
176
-
177
-    #A list to hold all the means
178
-    geneCorrMatrMeans = list();
179
-
180
-    #The mean of all the correlation matrices
181
-    cbar = 0;
182
-
183
-    #A list to return the means and the matrices
184
-    results = list();
185
-
186
-    #Scan in the significant genes from each column of Asd
187
-    #The columns of sigGeneNums hold the significant genes for each col of Asd
188
-    for(i in 1:numCols)
189
-    {
190
-        sigCount = 0;
191
-        for(j in 1:numGenes)
192
-        {
193
-            if((Amean[j,i] - (sdThreshold*Asd[j,i])) > 0)
194
-            {
195
-                sigCount = sigCount + 1;
196
-                sigGeneNums[sigCount, i] = j;
197
-            }
198
-        }
199
-
200
-        if(sigCount == 0)
201
-        {
202
-            sigGeneNums[1, i] = 0;
203
-        }
204
-
205
-        #Save the number of sigGenes
206
-        significantGeneNums[i] = sigCount;
207
-    }
208
-
209
-    #If a pattern has no significant genes this is clearly an error so return such
210
-    if(any(significantGeneNums == 0))
211
-    {
212
-        zeroSigCols = which(significantGeneNums == 0);
213
-        warning("Warning: No Significant Genes in Pattern(s): ");
214
-
215
-        for(z in 1:length(zeroSigCols))
216
-        {
217
-            message(zeroSigCols[z]);
218
-        }
219
-    }
220
-
221
-
222
-    #Now that we have the significant genes want to grab these from our original D matrix
223
-    #and find the sigGene x sigGene correlation matrix and find its mean
224
-
225
-    for(j in 1:numCols)
226
-    {
227
-        #Grab the number of significant genes from the interested column
228
-        sigCount = sum(sigGeneNums[,j] > 0, na.rm = TRUE);
229
-
230
-        if(sigCount != 0)
231
-        {
232
-
233
-            #loop through the number of significant genes and pull out the rows of D that represent these genes.
234
-            #Then find the correlation between them with the built in R corr function
235
-            tempSubsetD = matrix(nrow = sigCount, ncol = numSamp);
236
-            for(k in 1:sigCount)
237
-            {
238
-                #Subset D based on significant Genes
239
-                #need to transpose as it reads this in as column vector otherwise
240
-                tempSubsetD[k,] = t(DMatrix[sigGeneNums[k,j], ]);
241
-            }
242
-
243
-            #Find the correlation between these genes in D
244
-            #Need to transpose as it calculates correlations between the columns
245
-            tempGeneCorrMatrix = cor(t(tempSubsetD));
246
-
247
-            #Find the mean of this matrix
248
-            tempGeneCorrMatrMean = mean(tempGeneCorrMatrix);
249
-
250
-        }
251
-        else
252
-        {
253
-            tempGeneCorrMatrix = 0;
254
-            tempGeneCorrMatrMean = 0;
255
-        }
256
-
257
-        #Save these in the overall list
258
-        geneCorrMatrices[[j]] = tempGeneCorrMatrix;
259
-        geneCorrMatrMeans[[j]] = tempGeneCorrMatrMean;
260
-
261
-    }
262
-
263
-    #Return as an overall list of lists
264
-    # We return Corr Matrices themselves, their means, and the means of the means (cbar)
265
-    results[[1]] = geneCorrMatrices;
266
-    results[[2]] = geneCorrMatrMeans;
267
-
268
-    #Return as an overall list of lists
269
-    for(i in 1:numCols)
270
-    {
271
-        cbar = cbar + results[[2]][[i]];
272
-    }
273
-
274
-    cbar = cbar/numCols;
275
-    results[[3]] = cbar;
276
-
277
-    names(results) = c("CorrelationMatrices", "CorrelationMatrixMeans", "IntraPatternValue");
278
-
279
-    return(results);
280
-}
281
-
282
-
283
-#' patternMarkers
284
-#'
285
-#' @param Amatrix A matrix of genes by weights resulting from CoGAPS or other NMF decomposition
286
-#' @param scaledPmatrix logical indicating whether the corresponding pattern matrix was fixed to have max 1 during decomposition
287
-#' @param Pmatrix the corresponding Pmatrix (patterns X samples) for the provided Amatrix (genes x patterns). This must be supplied if scaledPmatrix is FALSE.
288
-#' @param threshold # the type of threshold to be used. The default "all" will distribute genes into pattern with the lowest ranking. The "cut" thresholding by the first gene to have a lower ranking, i.e. better fit to, a pattern.
289
-#' @param lp a vector of weights for each pattern to be used for finding markers. If NA markers for each pattern of the A matrix will be used.
290
-#' @param full logical indicating whether to return the ranks of each gene for each pattern
291
-#'
292
-#' @return By default a non-overlapping list of genes associated with each \code{lp}. If \code{full=TRUE} a data.frame of
293
-#' genes rankings with a column for each \code{lp} will also be returned.
294
-#' @export
295
-#'
296
-#' @examples \dontrun{
297
-#' patternMarkers(Amatrix=AP$Amean,scaledPmatrix=FALSE,Pmatrix=NA,threshold="All",full=TRUE)
298
-#' }
299
-#'
300
-patternMarkers <- function(
301
-    Amatrix=NA, #A matrix of genes by weights resulting from CoGAPS or other NMF decomposition
302
-    scaledPmatrix=FALSE, # logical indicating whether the corresponding pattern matrix was fixed to have max 1 during decomposition
303
-    Pmatrix=NA, #the corresponding Pmatrix (patterns X samples) for the provided Amatrix (genes x patterns). This must be supplied if scaledPmatrix is FALSE.
304
-    threshold="all", # the type of threshold to be used. The default "All" will distribute genes into pattern with the highest ranking. 
305
-   # The "cut" thresholding by the first gene to have a lower ranking, i.e. better fit to, a pattern. 
306
-    lp=NA, # a vector of weights for each pattern to be used for finding markers. If NA markers for each pattern of the A matrix will be used.
307
-    full=FALSE #logical indicating whether to return the ranks of each gene for each pattern.
308
-){
309
-
310
-
311
-if(scaledPmatrix==FALSE){
312
-    if(!is.na(Pmatrix)){
313
-      pscale <- apply(Pmatrix,1,max)   # rescale p's to have max 1
314
-      Amatrix <- sweep(Amatrix, 2, pscale, FUN="*")   # rescale A in accordance with p's having max 1
315
-  }
316
-    else(warning("P values must be provided if not already scaled"))
317
-  }
318
-# find the A with the highest magnitude
319
-Arowmax <- t(apply(Amatrix, 1, function(x) x/max(x)))
320
-pmax<-apply(Amatrix, 1, max)
321
-# determine which genes are most associated with each pattern
322
-ssranks<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=dimnames(Amatrix))#list()
323
-ssgenes<-matrix(NA, nrow=nrow(Amatrix), ncol=ncol(Amatrix),dimnames=NULL)
324
-nP=dim(Amatrix)[2]
325
-if(!is.na(lp)){
326
-    if(length(lp)!=dim(Amatrix)[2]){
327
-        warning("lp length must equal the number of columns of the Amatrix")
328
-    }
329
-        sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))
330
-        ssranks[order(sstat),i] <- 1:length(sstat)
331
-        ssgenes[,i]<-names(sort(sstat,decreasing=FALSE))
332
-} else {
333
-    for(i in 1:nP){
334
-        lp <- rep(0,dim(Amatrix)[2])
335
-        lp[i] <- 1
336
-        sstat <- apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp)))
337
-        ssranks[order(sstat),i] <- 1:length(sstat)
338
-        ssgenes[,i]<-names(sort(sstat,decreasing=FALSE))
339
-    }
340
-}
341
-if(threshold=="cut"){
342
-        geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min))))
343
-        ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x])
344
-        #geneThresh <- apply(sweep(ssranks,1,t(apply(ssranks, 1, min)),"-"),2,function(x) which(x==0))
345
-        #ssgenes.th <- lapply(geneThresh,names)
346
-} else if(threshold=="all"){
347
-        pIndx<-apply(ssranks,1,which.min)
348
-        gBYp <- lapply(sort(unique(pIndx)),function(x) names(pIndx[pIndx==x]))
349
-        ssgenes.th <- lapply(1:nP, function(x) ssgenes[which(ssgenes[,x] %in% gBYp[[x]]),x])
350
-} else {stop("Threshold arguement not viable option")}
351
-
352
-if(full){return(list("PatternMarkers"=ssgenes.th,"PatternRanks"=ssranks))
353
-} else{return("PatternMarkers"=ssgenes.th)}
354
-}
355
-
356
-
357
-#' patternMatch4Parallel
358
-#'
359
-#' @param Ptot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
360
-#' @param nSets number of parallel sets used to generate \code{Ptot}
361
-#' @param cnt  number of branches at which to cut dendrogram
362
-#' @param minNS minimum of individual set contributions a cluster must contain
363
-#' @param cluster.method the agglomeration method to be used for clustering
364
-#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.
365
-#' @param bySet logical indicating whether to return list of matched set solutions from \code{Ptot}
366
-#' @param ... additional parameters for \code{agnes}
367
-#'
368
-#' @return a matrix of concensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
369
-#' concensus pattern is also returned.
370
-#' @export
371
-#' @seealso \code{\link{agnes}}
372
-#'
373
-#'
374
-
375
-patternMatch4Parallel <- function(Ptot,
376
-  nSets, #number of sets
377
-  cnt, # number of branches at which to cut dendrogram
378
-  minNS, # minimum of sets a cluster must contain
379
-  cluster.method="complete",
380
-  ignore.NA=FALSE, # ignore NAs from potential over dimensionalization
381
-  bySet=FALSE,
382
-  ...){
383
-
384
-#### read in CoGAPS results
385
-cdir <- getwd()
386
-#if(!is.null(path)){setwd(path)}
387
-if(!is.null(minNS)){minNS=nSets/2}
388
-
389
-if(ignore.NA==FALSE){if(anyNA(Ptot)){warning('Non-sparse matrixes produced. Reducing the number of patterns asked for and rerun.')}}
390
-if(ignore.NA==TRUE){Ptot<-Ptot[complete.cases(Ptot),]}
391
-
392
-####################################################################
393
-# corr dist
394
-corr.dist=cor(t(Ptot))
395
-corr.dist=1-corr.dist
396
-# cluster
397
-#library(cluster)
398
-clust=agnes(x=corr.dist,diss=T,method=cluster.method)
399
-cut=cutree(as.hclust(clust),k=cnt)
400
-#save.image(file=paste("CoGAPS.",nP,"P.",nS,"Set.CorrClustCut",cnt,".RData"))
401
-####################################################################
402
-#drop n<4 and get weighted Avg
403
-cls=sort(unique(cut))
404
-cMNs=matrix(nrow=cnt,ncol=dim(Ptot)[2])
405
-rownames(cMNs)=cls
406
-colnames(cMNs)=colnames(Ptot)
407
-
408
-RtoMeanPattern <- list()
409
-PByClust <- list()
410
-for(i in cls){
411
-   if(is.null(dim(Ptot[cut == i, ]))==TRUE){
412
-       cMNs[i,] <- Ptot[cut == i, ]
413
-       RtoMeanPattern[[i]] <- rep(1,length(Ptot[cut == i, ]))
414
-       PByClust[[i]] <- t(as.matrix(Ptot[cut == i, ]))
415
-   }
416
-  else{
417
-  cMNs[i,]=colMeans(Ptot[cut==i,])
418
-  PByClust[[i]] <- Ptot[cut==i,]
419
-  nIN=sum(cut==i)
420
-  RtoMeanPattern[[i]] <- sapply(1:nIN,function(j) {round(cor(x=Ptot[cut==i,][j,],y=cMNs[i,]),3)})
421
-  }
422
-}
423
-
424
-#drop n<minNS 
425
-PByClustDrop <- list()
426
-RtoMPDrop <- list()
427
-for(i in cls){
428
-  if(is.null(dim(PByClust[[i]]))==TRUE){next}
429
-  if(dim(PByClust[[i]])[1]<minNS){next}
430
-  else{
431
-    #indx <- which(RtoMeanPattern[[i]]>.7,arr.ind = TRUE)
432
-    PByClustDrop <- append(PByClustDrop,list(PByClust[[i]]))
433
-    RtoMPDrop <- append(RtoMPDrop,list(RtoMeanPattern[[i]]))
434
-  }
435
-}
436
-
437
-
438
-### split by corr  (build in drop if below minNS)
439
-PByCDS <- list()
440
-RtoMPDS <- list()
441
-for(j in 1:length(PByClustDrop)){
442
-  if(is.null(dim(PByClustDrop[[j]]))==TRUE){
443
-      next
444
-      }
445
-  if(dim(PByClustDrop[[j]])[1]<minNS+nSets){
446
-    PByCDS <- append(PByCDS,PByClustDrop[j])
447
-    RtoMPDS <- append(RtoMPDS,RtoMPDrop[j])
448
-  }
449
-  if(dim(PByClustDrop[[j]])[1]>=minNS+nSets){
450
-    corr.distPBCD=cor(t(PByClustDrop[[j]]))
451
-    corr.distPBCD=1-corr.distPBCD
452
-    clustPBCD=agnes(x=corr.distPBCD,diss=T,method="complete")
453
-    cutPBCD=cutree(as.hclust(clustPBCD),k=2)
454
-    g1 <- PByClustDrop[[j]][cutPBCD==1,]
455
-    PByCDS <- append(PByCDS,list(g1))
456
-    RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g1)[1],function(z) round(cor(x=g1[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==1,])),3))))
457
-    g2 <- PByClustDrop[[j]][cutPBCD==2,]
458
-    if (is.null(dim(g2)[1])==FALSE){
459
-        PByCDS <- append(PByCDS,list(g2))
460
-        RtoMPDS <- append(RtoMPDS,list(sapply(1:dim(g2)[1],function(z) round(cor(x=g2[z,],y=colMeans(PByClustDrop[[j]][cutPBCD==2,])),3))))
461
-    }
462
-  }
463
-#print(j)
464
-#print(str(PByCDS))
465
-}
466
-
467
-#weighted.mean(PByClustDrop[[1]],RtoMPDrop[[1]])
468
-PByCDSWavg<- t(sapply(1:length(PByCDS),function(z) apply(PByCDS[[z]],2,function(x) weighted.mean(x,(RtoMPDS[[z]])^3))))
469
-rownames(PByCDSWavg) <- lapply(1:length(PByCDS),function(x) paste("Pattern",x))
470
-
471
-#save
472
-#save(PByCDSWavg,file=paste("PAatternSummary.UnScaled.",fname,".CoGAPS.",nP,"P.",nS,"Set.CorrClustCut",cnt,".RData",sep=""))
473
-
474
-#scale ps
475
-Pmax <- apply(PByCDSWavg,1,max)
476
-PByCDSWavgScaled <- t(sapply(1:dim(PByCDSWavg)[1],function(x) PByCDSWavg[x,]/Pmax[x]))
477
-rownames(PByCDSWavgScaled) <- rownames(PByCDSWavg)
478
-
479
-if(bySet){
480
-# return by set and final
481
-PBySet<-PByCDS
482
-return(list("consenusPatterns"=PByCDSWavgScaled,"PBySet"=PBySet))
483
-} else {return(PByCDSWavgScaled)}
484
-
485
-}
486
-
487
-
488
-####################################################################
489
-####################################################################
490
-
491
-
492
-#' PatternMatcher Shiny Ap
493
-#'
494
-#' @param PBySet list of matched set solutions for the Pmatrix from an NMF algorithm
495
-#' @param out optional name for saving output
496
-#' @param order optional vector indicating order of samples for plotting. Default is NULL.
497
-#' @param sample.color optional vector of colors of same length as colnames. Default is NULL.
498
-#'
499
-#' @return either an index of selected sets' contributions or the editted \code{PBySet} object
500
-#' @export
501
-#'
502
-#' @examples \dontrun{
503
-#' patternMatcher(PBySet,out,order,sample.color)
504
-#' }
505
-#'
506
-#'
507
-patternMatcher<-function(PBySet=NULL,out=NULL,order=NULL, sample.color=NULL) {
508
-
509
-runApp(list(
510
-  ui = pageWithSidebar(
511
-    # Application title
512
-    headerPanel('NMF Pattern Matching'),
513
-    # Side pannel with controls
514
-    sidebarPanel(
515
-      # to upload file
516
-      fileInput('file1',
517
-                'Choose .Rda File',
518
-                accept=c('.RData','.Rda','R data object','.rda')
519
-      ),
520