Browse code

Update documentation to Roxygen2

Shifts .Rd files in /man from being manually
created to being generated via Roxygen2. Data
is now documented in R/data.R and functions
are documented in the same files they are defined
in. Will make package more comparable to other
KechrisLab packages and easier to maintain

Max McGrath authored on 04/08/2021 22:21:24
Showing16 changed files

... ...
@@ -1,3 +1,45 @@
1
+#' Create correlation coefficient vectors based on bivariate data
2
+#' 
3
+#' Calculates correlation coefficients based on two groups of -omics bivariate 
4
+#' data. Currently, only two groups of samples can be specified. Used to make 
5
+#' input for discordantRun().
6
+#' 
7
+#' @param x ExpressionSet of -omics data
8
+#' @param y Optional second ExpressionSet of -omics data, induces dual -omics 
9
+#' analysis
10
+#' @param groups n-length vector of 1s and 2s matching samples belonging to 
11
+#' groups 1 and 2
12
+#' @param cor.method Correlation method to measure association. Options are 
13
+#' "spearman", "pearson", "bwmc" and "sparcc"
14
+#' @details Creates vectors of correlation coefficents based on feature pairs 
15
+#' within x or between x and y. The names of the vectors are the feature pairs 
16
+#' taken from x and y.
17
+#' @return List of two named numeric vectors. Vectors give the correlation
18
+#' coefficients for groups 1 and 2 respectively, and vector names give
19
+#' the each feature for the resptive feature pair seperated by an underscore.
20
+#' @author Charlotte Siska \email{siska.charlotte@@gmail.com}
21
+#' @author Max McGrath \email{max.mcgrath@@ucdenver.edu}
22
+#' @examples 
23
+#' 
24
+#' ## load data
25
+#' data("TCGA_GBM_miRNA_microarray")
26
+#' data("TCGA_GBM_transcript_microarray")
27
+#' print(colnames(TCGA_GBM_transcript_microarray)) # look at groups
28
+#' groups <- c(rep(1,10), rep(2,20))
29
+#' # transcript-transcript pairs
30
+#' vectors <- createVectors(TCGA_GBM_transcript_microarray, 
31
+#'                          groups = groups, cor.method = c("pearson"))
32
+#' # miRNA-transcript pairs
33
+#' vectors <- createVectors(TCGA_GBM_transcript_microarray, 
34
+#'                          TCGA_GBM_miRNA_microarray, groups = groups)
35
+#'                          
36
+#' @references 
37
+#' Siska C, Bowler R and Kechris K. The Discordant Method: A Novel Approach for 
38
+#' Differential Correlation. (2015) Bioinformatics. 32(5): 690-696.
39
+#' 
40
+#' Friedman J and Alm EJ. Inferring Correlation Networks from Genomic Survey 
41
+#' Data. (2012) PLoS Computational Biology. 8:9, e1002687.
42
+#' 
1 43
 #' @import Biobase
2 44
 #' @import biwt
3 45
 #' @import gtools
4 46
new file mode 100644
... ...
@@ -0,0 +1,111 @@
1
+#' Example breast miRNA-Seq count dataset.
2
+#' 
3
+#' This dataset contains TMM normalized voom-transformed miRNA count values from
4
+#' miRNASeq that was taken from the Cancer Genome Atlas, or TCGA. The dataset 
5
+#' has 100 miRNA and 57 samples. The original dataset has 212 miRNA and 
6
+#' 57 samples.
7
+#' 
8
+#' @docType data
9
+#' @format An ExpressionSet with 100 features, 57 samples
10
+#' @keywords datasets
11
+#' @references National Institutes of Health. The Cancer Genome Atlas.
12
+#' @source \url{http://cancergenome.nih.gov/}
13
+#' @usage data(TCGA_Breast_miRNASeq)
14
+#' @examples 
15
+#'   data(TCGA_Breast_miRNASeq)
16
+"TCGA_Breast_miRNASeq"
17
+
18
+#' Example breast miRNA-Seq voom-transformed count dataset.
19
+#' 
20
+#' This dataset contains TMM normalized voom-transformed miRNA count values
21
+#' from miRNASeq that was taken from the Cancer Genome Atlas, or TCGA. The
22
+#' dataset has 100 miRNA and 57 samples. The original dataset has 212 miRNA and
23
+#' 57 samples.
24
+#'
25
+#' @docType data
26
+#' @format An ExpressionSet with 100 features and 57 samples
27
+#' @references 
28
+#' Charity W Law, Yunshun Chen, Wei Shi, Gordon K Smyth. voom: 
29
+#' precision weights unlock linear model analysis tools for RNA-seq read 
30
+#' counts. 2014. Genome Biology, 15:R29.
31
+#'   
32
+#' National Institues of Health. The Cancer Genome Atlas.
33
+#' @source \url{http://cancergenome.nih.gov/}
34
+#' @usage data(TCGA_Breast_miRNASeq_voom)
35
+#' @examples 
36
+#'   data(TCGA_Breast_miRNASeq_voom)
37
+"TCGA_Breast_miRNASeq_voom"
38
+
39
+#' TCGA Breast Cancer RNASeq Sample Dataset
40
+#' 
41
+#' This dataset contains TMM normalized RNA count values from RNASeq that was
42
+#' taken from the Cancer Genome Atlas, or TCGA. It has 100 features and 57
43
+#' samples. The original dataset had 17972 features and 57 samples.
44
+#'
45
+#' @docType data
46
+#' @format An ExpressionSet with 100 features and 57 samples
47
+#' @references 
48
+#' Charity W Law, Yunshun Chen, Wei Shi, Gordon K Smyth. voom: 
49
+#' precision weights unlock linear model analysis tools for RNA-seq read 
50
+#' counts. 2014. Genome Biology, 15:R29.
51
+#'   
52
+#' National Institues of Health. The Cancer Genome Atlas.
53
+#' @source \url{http://cancergenome.nih.gov/}
54
+#' @usage data(TCGA_Breast_RNASeq)
55
+#' @examples 
56
+#'   data(TCGA_Breast_RNASeq)
57
+"TCGA_Breast_RNASeq"
58
+
59
+#' TCGA Breast Cancer RNASeq Sample Dataset
60
+#' 
61
+#' This dataset contains TMM normalized voom-transformed RNA count values from
62
+#' RNASeq that was taken from the Cancer Genome Atlas, or TCGA.
63
+#'  
64
+#' @docType data
65
+#' @format  An ExpressionSet with 100 features and 57 samples
66
+#' @references 
67
+#' Charity W Law, Yunshun Chen, Wei Shi, Gordon K Smyth. voom: precision 
68
+#' weights unlock linear model analysis tools for RNA-seq read counts. 2014. 
69
+#' Genome Biology, 15:R29.
70
+#' 
71
+#' National Institues of Health. The Cancer Genome Atlas.
72
+#' @source \url{http://cancergenome.nih.gov/}
73
+#' @usage data(TCGA_Breast_miRNASeq_voom)
74
+#' @examples
75
+#'   data(TCGA_Breast_miRNASeq_voom)
76
+"TCGA_Breast_RNASeq_voom"
77
+
78
+#' TCGA Glioblastoma Multiforme miRNA Sample Dataset
79
+#' 
80
+#' This dataset contains miRNA expression values from a microarray that was
81
+#' taken from the Cancer Genome Atlas, or TCGA. It has 10 features and 30
82
+#' samples. The original dataset had 331 features and 30 samples.
83
+#' 
84
+#' @docType data
85
+#' @format An ExpressionSet with 10 features, 30 samples
86
+#' @keywords datasets
87
+#' @references National Institutes of Health. The Cancer Genome Atlas.
88
+#' @source \url{http://cancergenome.nih.gov/}
89
+#' @usage data(TCGA_GBM_miRNA_microarray)
90
+#' @examples 
91
+#'   data(TCGA_GBM_miRNA_microarray)
92
+"TCGA_GBM_miRNA_microarray"
93
+
94
+#' TCGA Glioblastoma Multiforme Transcript Sample Dataset
95
+#' 
96
+#' This dataset contains transcript expression values from a microarray that was
97
+#'  taken from the Cancer Genome Atlas, or TCGA. It has 10 features and 30 
98
+#'  samples. The original dataset had 72656 features and 30 samples.
99
+#' 
100
+#' @docType data
101
+#' @format An ExpressionSet with 10 features, 30 samples
102
+#' @keywords datasets
103
+#' @references National Institutes of Health. The Cancer Genome Atlas.
104
+#' @source \url{http://cancergenome.nih.gov/}
105
+#' @usage data(TCGA_GBM_transcript_microarray)
106
+#' @examples 
107
+#'   data(TCGA_GBM_transcript_microarray)
108
+"TCGA_GBM_transcript_microarray"
109
+
110
+
111
+
... ...
@@ -12,6 +12,88 @@
12 12
 # R function for SparCC, adapted from https://bitbucket.org/yonatanf/sparcc
13 13
 # Email: hyfang@pku.edu.cn
14 14
 
15
+#' Run Discordant Algorithm
16
+#' 
17
+#' Runs discordant algorithm on two vectors of correlation coefficients.
18
+#' 
19
+#' @param v1 Vector of correlation coefficients in group 1
20
+#' @param v2 Vector of correlation coefficients in group 2
21
+#' @param x ExpressionSet of -omics data
22
+#' @param y ExpressionSet of -omics data, induces dual -omics analysis
23
+#' @param transform If TRUE v1 and v2 will be Fisher transformed
24
+#' @param subsampling If TRUE subsampling will be run
25
+#' @param subSize Indicates how many feature pairs to be used for subsampling. 
26
+#' Default is the feature size in x
27
+#' @param iter Number of iterations for subsampling. Default is 100
28
+#' @param components Number of components in mixture model.
29
+#' 
30
+#' @return
31
+#' \describe{
32
+#'   \item{discordPPVector}{Vector of differentially correlated posterior 
33
+#'   probabilities.}
34
+#'   \item{discordPPMatrix}{Matrix of differentially correlated posterior 
35
+#'   probabilities where rows and columns reflect features}
36
+#'   \item{classVector}{Vector of classes that have the highest posterior 
37
+#'   probability}
38
+#'   \item{classMatrix}{Matrix of classes that have hte highest posterior 
39
+#'   probability where rows and columns reflect features}
40
+#'   \item{probMatrix}{Matrix of posterior probabilities where rows are each 
41
+#'   molecular feature pair and columns are nine different classes}
42
+#'   \item{loglik}{Final log likelihood}
43
+#' }
44
+#' @details 
45
+#' The discordant algorithm is based on a Gaussian mixture model. If there are 
46
+#' three components, correlation coefficients are clustered into negative 
47
+#' correlations (-), positive correlations (+) and no correlation (0). If there 
48
+#' are five components, then there are two more classes for very negative 
49
+#' correlation (--) and very positive correlations (++). All possible 
50
+#' combinations for these components are made into classes. If there are three 
51
+#' components, there are 9 classes. If there are five components, there are 25 
52
+#' classes.
53
+#' 
54
+#' The posterior probabilities for each class are generated and outputted into 
55
+#' the value probMatrix. The value probMatrix is a matrix where each column is a
56
+#'  class and each row is a feature pair. The values discordPPVector and 
57
+#'  discordPPMatrix are the summed differential correlation posterior 
58
+#'  probability for each feature pair. The values classVector and classMatrix 
59
+#'  are the class with the highest posterior probability for each feature pair.
60
+#' @references 
61
+#' Siska C, Bowler R and Kechris K. The Discordant Method: A Novel Approach for 
62
+#' Differential Correlation (2015), Bioinformatics. 32 (5): 690-696.
63
+#' 
64
+#' Lai Y, Zhang F, Nayak TK, Modarres R, Lee NH and McCaffrey TA. Concordant 
65
+#' integrative gene set enrichment analysis of multiple large-scale two-sample 
66
+#' expression data sets. (2014) BMC Genomics 15, S6.
67
+#' 
68
+#' Lai Y, Adam B-l, Podolsky R, She J-X. A mixture model approach to the tests 
69
+#' of concordance and discordancd between two large-scale experiments with two 
70
+#' sample groups. (2007) Bioinformatics 23, 1243-1250.
71
+#' 
72
+#' @author Charlotte Siska \email{siska.charlotte@@gmail.com}
73
+#' @author Max McGrath \email{max.mcgrath@@ucdenver.edu}
74
+#' 
75
+#' @examples
76
+#' # Load Data
77
+#' data(TCGA_GBM_miRNA_microarray)
78
+#' data(TCGA_GBM_transcript_microarray)
79
+#' print(colnames(TCGA_GBM_transcript_microarray)) # look at groups
80
+#' groups <- c(rep(1,10), rep(2,20))
81
+#' 
82
+#' ## DC analysis on only transcripts pairs
83
+#' 
84
+#' vectors <- createVectors(TCGA_GBM_transcript_microarray, 
85
+#'                          groups = groups)
86
+#' result <- discordantRun(vectors$v1, vectors$v2, 
87
+#'                         TCGA_GBM_transcript_microarray)
88
+#' 
89
+#' ## DC analysis on miRNA-transcript pairs
90
+#' 
91
+#' vectors <- createVectors(TCGA_GBM_transcript_microarray, 
92
+#'                          TCGA_GBM_miRNA_microarray, groups = groups, 
93
+#'                          cor.method = c("pearson"))
94
+#' result <- discordantRun(vectors$v1, vectors$v2, 
95
+#'                         TCGA_GBM_transcript_microarray, 
96
+#'                        TCGA_GBM_miRNA_microarray)
15 97
 #' @export
16 98
 discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, 
17 99
                           subsampling = FALSE, subSize = dim(x)[1], iter = 100, 
... ...
@@ -1,3 +1,41 @@
1
+#' Outliers using left and right MAD
2
+#' 
3
+#' Identify features with outliers using left and right median absolute 
4
+#' deviation (MAD).
5
+#' 
6
+#' @param mat m by n matrix of -omics data, where rows are features and columns 
7
+#' samples.
8
+#' @param filter0 Option to filter out features if they have at least one 0 
9
+#' value. Default is TRUE.
10
+#' @param threshold Threshold of how many MADs outside the left or right median 
11
+#' is used to determine features with outliers.
12
+#' @return
13
+#' \describe{
14
+#' \item{mat.filtered}{Input matrix where features with outliers filtered out.}
15
+#' \item{index}{Index of features that have no outliers.}
16
+#' }
17
+#' @details The purpose of this function is to determine outliers in 
18
+#' non-symmetric distributions. The distribution is split by the median. 
19
+#' Outliers are identifed by being however many median absolute deviations (MAD)
20
+#' from either split distribution.
21
+#' 
22
+#' @references 
23
+#' Leys C, Klein O, Bernard P and Licata L. "Detecting Outliers: Do Not Use 
24
+#' Standard Deviation Around the Mean, Use Absolute Deivation Around the 
25
+#' Median." Journal of Experimental Social Psychology, 2013. 49(4), 764-766.
26
+#' 
27
+#' Magwene, PM, Willis JH, Kelly JK and Siepel A. "The Statistics of Bulk 
28
+#' Segregant Analysis Using Next Generation Sequencing." PLoS Computational 
29
+#' Biology, 2011. 7(11), e1002255.
30
+#' 
31
+#' 
32
+#' @examples 
33
+#' ## Simulate matrix of continuous -omics data.
34
+#' data(TCGA_Breast_miRNASeq)
35
+#' 
36
+#' ## Filter matrix based on outliers.
37
+#' mat.filtered <- splitMADOutlier(TCGA_Breast_miRNASeq)$mat.filtered
38
+#' 
1 39
 #' @export
2 40
 splitMADOutlier <- function(mat, filter0 = TRUE, threshold = 2) {
3 41
     if(mode(mat) != "S4") {
... ...
@@ -1,3 +1,26 @@
1
+#' Fisher Transformation of Pearson Correlation Coefficients to Z Scores
2
+#' 
3
+#' Transforms Pearsons correlation coefficients into z scores using Fishers 
4
+#' method.
5
+#' 
6
+#' @param rho Integer or numeric vector of Pearson's correlation coefficients
7
+#' @return Returns Fisher-transformed correlation coefficients
8
+#' @references 
9
+#' Fisher, R.A. (1915). "Frequency distribution of the values of the correlation
10
+#' coefficient in samples of an indefinitely large population". Biometrika
11
+#' (Biometrika Trust) 10 (4).
12
+#' @details Fisher's transformation is when correlation coefficients are 
13
+#' transformed into a z score. These z scores have an approximately normal 
14
+#' distribution.
15
+#' @examples 
16
+#' ## Create integer or list of Pearson's correlation coefficients.
17
+#' 
18
+#' library(MASS)
19
+#' rhoV <- as.vector(cor(t(mvrnorm(10,rep(3,100),diag(100)))))
20
+#' 
21
+#' ## Determine Fisher-Transformed z scores of rho
22
+#' zV <- fishersTrans(rhoV)
23
+#' 
1 24
 #' @export
2 25
 fishersTrans <- function(rho) {
3 26
     r = (1 + rho) / (1 - rho)
... ...
@@ -1,22 +1,31 @@
1
-\name{TCGA_Breast_RNASeq}
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/data.R
2 3
 \docType{data}
4
+\name{TCGA_Breast_RNASeq}
3 5
 \alias{TCGA_Breast_RNASeq}
4 6
 \title{TCGA Breast Cancer RNASeq Sample Dataset}
7
+\format{
8
+An ExpressionSet with 100 features and 57 samples
9
+}
10
+\source{
11
+\url{http://cancergenome.nih.gov/}
12
+}
13
+\usage{
14
+data(TCGA_Breast_RNASeq)
15
+}
5 16
 \description{
6
-	This dataset contains TMM normalized RNA count values from RNASeq that was taken from the Cancer Genome Atlas, or TCGA. It has 100 features and 57 samples. The original dataset had 17972 features and 57 samples.
17
+This dataset contains TMM normalized RNA count values from RNASeq that was
18
+taken from the Cancer Genome Atlas, or TCGA. It has 100 features and 57
19
+samples. The original dataset had 17972 features and 57 samples.
7 20
 }
8
-
9
-\value{
10
-Breast RNA-Seq count data with 100 features and 57 samples.}
11
-
12
-\references{
13
-National Institues of Health. The Cancer Genome Atlas. http://cancergenome.nih.gov/
21
+\examples{
22
+  data(TCGA_Breast_RNASeq)
14 23
 }
15
-
16
-\author{
17
-Charlotte Siska <siska.charlotte@gmail.com>
24
+\references{
25
+Charity W Law, Yunshun Chen, Wei Shi, Gordon K Smyth. voom: 
26
+precision weights unlock linear model analysis tools for RNA-seq read 
27
+counts. 2014. Genome Biology, 15:R29.
28
+  
29
+National Institues of Health. The Cancer Genome Atlas.
18 30
 }
19
-
20
-\usage{TCGA_Breast_RNASeq}
21
-\format{A matrix of RNA count values}
22 31
 \keyword{datasets}
... ...
@@ -1,23 +1,30 @@
1
-\name{TCGA_Breast_RNASeq_voom}
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/data.R
2 3
 \docType{data}
4
+\name{TCGA_Breast_RNASeq_voom}
3 5
 \alias{TCGA_Breast_RNASeq_voom}
4 6
 \title{TCGA Breast Cancer RNASeq Sample Dataset}
7
+\format{
8
+An ExpressionSet with 100 features and 57 samples
9
+}
10
+\source{
11
+\url{http://cancergenome.nih.gov/}
12
+}
13
+\usage{
14
+data(TCGA_Breast_miRNASeq_voom)
15
+}
5 16
 \description{
6
-	This dataset contains TMM normalized voom-transformed RNA count values from RNASeq that was taken from the Cancer Genome Atlas, or TCGA.
17
+This dataset contains TMM normalized voom-transformed RNA count values from
18
+RNASeq that was taken from the Cancer Genome Atlas, or TCGA.
7 19
 }
8
-
9
-\value{
10
-Breast RNA-Seq voom-transformed count data with 100 features and 57 samples.}
11
-
12
-\references{
13
-Charity W Law, Yunshun Chen, Wei Shi, Gordon K Smyth. voom: precision weights unlock linear model analysis tools for RNA-seq read counts. 2014. Genome Biology, 15:R29.
14
-National Institues of Health. The Cancer Genome Atlas. http://cancergenome.nih.gov/
20
+\examples{
21
+  data(TCGA_Breast_miRNASeq_voom)
15 22
 }
23
+\references{
24
+Charity W Law, Yunshun Chen, Wei Shi, Gordon K Smyth. voom: precision 
25
+weights unlock linear model analysis tools for RNA-seq read counts. 2014. 
26
+Genome Biology, 15:R29.
16 27
 
17
-\author{
18
-Charlotte Siska <siska.charlotte@gmail.com>
28
+National Institues of Health. The Cancer Genome Atlas.
19 29
 }
20
-
21
-\usage{TCGA_Breast_RNASeq_voom}
22
-\format{A matrix of RNA count values}
23 30
 \keyword{datasets}
... ...
@@ -1,22 +1,28 @@
1
-\name{TCGA_Breast_miRNASeq}
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/data.R
2 3
 \docType{data}
4
+\name{TCGA_Breast_miRNASeq}
3 5
 \alias{TCGA_Breast_miRNASeq}
4
-\title{TCGA Breast Cancer miRNASeq Sample Dataset}
6
+\title{Example breast miRNA-Seq count dataset.}
7
+\format{
8
+An ExpressionSet with 100 features, 57 samples
9
+}
10
+\source{
11
+\url{http://cancergenome.nih.gov/}
12
+}
13
+\usage{
14
+data(TCGA_Breast_miRNASeq)
15
+}
5 16
 \description{
6
-	This dataset contains TMM normalized miRNA count values from miRNASeq that was taken from the Cancer Genome Atlas, or TCGA. The dataset has 100 miRNA and 57 samples. The original dataset has 212 miRNA and 57 samples.
17
+This dataset contains TMM normalized voom-transformed miRNA count values from
18
+miRNASeq that was taken from the Cancer Genome Atlas, or TCGA. The dataset 
19
+has 100 miRNA and 57 samples. The original dataset has 212 miRNA and 
20
+57 samples.
7 21
 }
8
-
9
-\value{
10
-Breast miRNA-Seq count data with 100 features and 57 samples.}
11
-
12
-\references{
13
-National Institues of Health. The Cancer Genome Atlas. http://cancergenome.nih.gov/
22
+\examples{
23
+  data(TCGA_Breast_miRNASeq)
14 24
 }
15
-\author{
16
-Charlotte Siska <siska.charlotte@gmail.com>
25
+\references{
26
+National Institutes of Health. The Cancer Genome Atlas.
17 27
 }
18
-
19
-
20
-\usage{TCGA_Breast_miRNASeq}
21
-\format{A matrix of miRNA count values}
22 28
 \keyword{datasets}
... ...
@@ -1,23 +1,32 @@
1
-\name{TCGA_Breast_miRNASeq_voom}
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/data.R
2 3
 \docType{data}
4
+\name{TCGA_Breast_miRNASeq_voom}
3 5
 \alias{TCGA_Breast_miRNASeq_voom}
4
-\title{TCGA Breast Cancer miRNASeq Sample Dataset}
6
+\title{Example breast miRNA-Seq voom-transformed count dataset.}
7
+\format{
8
+An ExpressionSet with 100 features and 57 samples
9
+}
10
+\source{
11
+\url{http://cancergenome.nih.gov/}
12
+}
13
+\usage{
14
+data(TCGA_Breast_miRNASeq_voom)
15
+}
5 16
 \description{
6
-	This dataset contains TMM normalized voom-transformed miRNA count values from miRNASeq that was taken from the Cancer Genome Atlas, or TCGA. The dataset has 100 miRNA and 57 samples. The original dataset has 212 miRNA and 57 samples.
17
+This dataset contains TMM normalized voom-transformed miRNA count values
18
+from miRNASeq that was taken from the Cancer Genome Atlas, or TCGA. The
19
+dataset has 100 miRNA and 57 samples. The original dataset has 212 miRNA and
20
+57 samples.
7 21
 }
8
-
9
-\value{
10
-Breast miRNA-Seq voom-transformed count data with 100 features and 57 samples.}
11
-
12
-\references{
13
-Charity W Law, Yunshun Chen, Wei Shi, Gordon K Smyth. voom: precision weights unlock linear model analysis tools for RNA-seq read counts. 2014. Genome Biology, 15:R29.
14
-National Institues of Health. The Cancer Genome Atlas. http://cancergenome.nih.gov/
22
+\examples{
23
+  data(TCGA_Breast_miRNASeq_voom)
15 24
 }
16
-
17
-\author{
18
-Charlotte Siska <siska.charlotte@gmail.com>
25
+\references{
26
+Charity W Law, Yunshun Chen, Wei Shi, Gordon K Smyth. voom: 
27
+precision weights unlock linear model analysis tools for RNA-seq read 
28
+counts. 2014. Genome Biology, 15:R29.
29
+  
30
+National Institues of Health. The Cancer Genome Atlas.
19 31
 }
20
-
21
-\usage{TCGA_Breast_miRNASeq_voom}
22
-\format{A matrix of miRNA count values}
23 32
 \keyword{datasets}
... ...
@@ -1,22 +1,27 @@
1
-\name{TCGA_GBM_miRNA_microarray}
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/data.R
2 3
 \docType{data}
4
+\name{TCGA_GBM_miRNA_microarray}
3 5
 \alias{TCGA_GBM_miRNA_microarray}
4 6
 \title{TCGA Glioblastoma Multiforme miRNA Sample Dataset}
7
+\format{
8
+An ExpressionSet with 10 features, 30 samples
9
+}
10
+\source{
11
+\url{http://cancergenome.nih.gov/}
12
+}
13
+\usage{
14
+data(TCGA_GBM_miRNA_microarray)
15
+}
5 16
 \description{
6
-	This dataset contains miRNA expression values from a microarray that was taken from the Cancer Genome Atlas, or TCGA. It has 10 features and 30 samples. The original dataset had 331 features and 30 samples.
17
+This dataset contains miRNA expression values from a microarray that was
18
+taken from the Cancer Genome Atlas, or TCGA. It has 10 features and 30
19
+samples. The original dataset had 331 features and 30 samples.
7 20
 }
8
-
9
-\value{
10
-GBM miRNA microarray data with 10 features and 30 samples.}
11
-
12
-\references{
13
-National Institues of Health. The Cancer Genome Atlas. http://cancergenome.nih.gov/
21
+\examples{
22
+  data(TCGA_GBM_miRNA_microarray)
14 23
 }
15
-
16
-\author{
17
-Charlotte Siska <siska.charlotte@gmail.com>
24
+\references{
25
+National Institutes of Health. The Cancer Genome Atlas.
18 26
 }
19
-
20
-\usage{TCGA_GBM_miRNASample}
21
-\format{A matrix of miRNA expression values}
22 27
 \keyword{datasets}
... ...
@@ -1,22 +1,27 @@
1
-\name{TCGA_GBM_transcript_microarray}
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/data.R
2 3
 \docType{data}
4
+\name{TCGA_GBM_transcript_microarray}
3 5
 \alias{TCGA_GBM_transcript_microarray}
4 6
 \title{TCGA Glioblastoma Multiforme Transcript Sample Dataset}
7
+\format{
8
+An ExpressionSet with 10 features, 30 samples
9
+}
10
+\source{
11
+\url{http://cancergenome.nih.gov/}
12
+}
13
+\usage{
14
+data(TCGA_GBM_transcript_microarray)
15
+}
5 16
 \description{
6
-	This dataset contains transcript expression values from a microarray that was taken from the Cancer Genome Atlas, or TCGA. It has 10 features and 30 samples. The original dataset had 72656 features and 30 samples.
17
+This dataset contains transcript expression values from a microarray that was
18
+ taken from the Cancer Genome Atlas, or TCGA. It has 10 features and 30 
19
+ samples. The original dataset had 72656 features and 30 samples.
7 20
 }
8
-
9
-\value{
10
-GBM transcript microarray data with 10 features and 30 samples.}
11
-
12
-\references{
13
-National Institues of Health. The Cancer Genome Atlas. http://cancergenome.nih.gov/
21
+\examples{
22
+  data(TCGA_GBM_transcript_microarray)
14 23
 }
15
-
16
-\author{
17
-Charlotte Siska <siska.charlotte@gmail.com>
24
+\references{
25
+National Institutes of Health. The Cancer Genome Atlas.
18 26
 }
19
-
20
-\usage{TCGA_GBM_transcript_microarray}
21
-\format{A matrix of transcript expression values}
22 27
 \keyword{datasets}
... ...
@@ -1,58 +1,67 @@
1
-
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/createVectors.R
2 3
 \name{createVectors}
3 4
 \alias{createVectors}
4
-\title{Create Pearson's correlation coefficient vectors based on bivariate data}
5
-
6
-\description{
7
-  Calculates correlation coefficients based on two groups of omics bivariate data. Currently, only two groups of samples can be specified. Used to make input for discordantRun().
8
-}
9
-
5
+\title{Create correlation coefficient vectors based on bivariate data}
10 6
 \usage{
11
-createVectors(x, y = NULL, groups, cor.method = c("spearman"))
7
+createVectors(
8
+  x,
9
+  y = NULL,
10
+  groups,
11
+  cor.method = c("spearman", "pearson", "bwmc", "sparcc")
12
+)
12 13
 }
13
-
14 14
 \arguments{
15
-  \item{x}{ExpressionSet of -omics data}
16
-  \item{y}{optional second ExpressionSet of -omics data, induces dual -omics analysis}
17
-  \item{groups}{n-length vector of 1s and 2s matching samples belonging to groups 1 and 2}
18
-  \item{cor.method}{correlation method to measure association. Options are "spearman", "pearson", "bwmc" and "sparcc"}
19
-}
15
+\item{x}{ExpressionSet of -omics data}
20 16
 
17
+\item{y}{Optional second ExpressionSet of -omics data, induces dual -omics 
18
+analysis}
19
+
20
+\item{groups}{n-length vector of 1s and 2s matching samples belonging to 
21
+groups 1 and 2}
22
+
23
+\item{cor.method}{Correlation method to measure association. Options are 
24
+"spearman", "pearson", "bwmc" and "sparcc"}
25
+}
21 26
 \value{
22
-  \item{v1}{List of correlation coefficients for group 1}
23
-  \item{v2}{List of correlation coefficients for group 2}
27
+List of two named numeric vectors. Vectors give the correlation
28
+coefficients for groups 1 and 2 respectively, and vector names give
29
+the each feature for the resptive feature pair seperated by an underscore.
24 30
 }
25
-
26
-\references{
27
-Siska C, Bowler R and Kechris K. The Discordant Method: A Novel Approach for Differential Correlation. (2015) Bioinformatics. 32(5): 690-696.
28
-Friedman J and Alm EJ. Inferring Correlation Networks from Genomic Survey Data. (2012) PLoS Computational Biology. 8:9, e1002687.
31
+\description{
32
+Calculates correlation coefficients based on two groups of -omics bivariate 
33
+data. Currently, only two groups of samples can be specified. Used to make 
34
+input for discordantRun().
29 35
 }
30
-
31 36
 \details{
32
-Creates vectors of correlation coefficents based on feature pairs within x or between x and y. The names of the vectors are the feature pairs taken from x and y.
37
+Creates vectors of correlation coefficents based on feature pairs 
38
+within x or between x and y. The names of the vectors are the feature pairs 
39
+taken from x and y.
33 40
 }
34
-
35
-\author{
36
-Charlotte Siska <siska.charlotte@gmail.com>
37
-}
38
-
39 41
 \examples{
40 42
 
41 43
 ## load data
42
-data("TCGA_GBM_miRNA_microarray") # loads matrix called TCGA_GBM_miRNA_microarray
43
-data("TCGA_GBM_transcript_microarray") # loads matrix called TCGA_GBM_transcript_microarray
44
+data("TCGA_GBM_miRNA_microarray")
45
+data("TCGA_GBM_transcript_microarray")
44 46
 print(colnames(TCGA_GBM_transcript_microarray)) # look at groups
45
-
46 47
 groups <- c(rep(1,10), rep(2,20))
47
-
48 48
 # transcript-transcript pairs
49
-
50
-vectors <- createVectors(TCGA_GBM_transcript_microarray, groups = groups, cor.method = c("pearson"))
51
-
49
+vectors <- createVectors(TCGA_GBM_transcript_microarray, 
50
+                         groups = groups, cor.method = c("pearson"))
52 51
 # miRNA-transcript pairs
52
+vectors <- createVectors(TCGA_GBM_transcript_microarray, 
53
+                         TCGA_GBM_miRNA_microarray, groups = groups)
54
+                         
55
+}
56
+\references{
57
+Siska C, Bowler R and Kechris K. The Discordant Method: A Novel Approach for 
58
+Differential Correlation. (2015) Bioinformatics. 32(5): 690-696.
53 59
 
54
-vectors <- createVectors(TCGA_GBM_transcript_microarray, TCGA_GBM_miRNA_microarray, groups = groups)
55
-
60
+Friedman J and Alm EJ. Inferring Correlation Networks from Genomic Survey 
61
+Data. (2012) PLoS Computational Biology. 8:9, e1002687.
56 62
 }
63
+\author{
64
+Charlotte Siska \email{siska.charlotte@gmail.com}
57 65
 
58
-\keyword{datagen}
66
+Max McGrath \email{max.mcgrath@ucdenver.edu}
67
+}
... ...
@@ -1,72 +1,113 @@
1
-% File src/library/discordant/man/discordantRun.Rd
2
-
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/discordant.R
3 3
 \name{discordantRun}
4 4
 \alias{discordantRun}
5 5
 \title{Run Discordant Algorithm}
6
-
7
-\description{
8
-   Runs discordant algorithm on two vectors of correlation coefficients.
9
-}
10
-
11 6
 \usage{
12
-discordantRun(v1, v2, x, y = NULL, transform = TRUE, subsampling = FALSE, subSize = dim(x)[1], iter = 100, components = 3)
7
+discordantRun(
8
+  v1,
9
+  v2,
10
+  x,
11
+  y = NULL,
12
+  transform = TRUE,
13
+  subsampling = FALSE,
14
+  subSize = dim(x)[1],
15
+  iter = 100,
16
+  components = 3
17
+)
13 18
 }
14
-
15 19
 \arguments{
16
-  \item{v1}{Vector of Pearson correlation coefficients in group 1}
17
-  \item{v2}{Vector of Pearson correlation coefficients in group 2}
18
-  \item{x}{ExpressionSet of -omics data}
19
-  \item{y}{ExpressionSet of -omics data, induces dual -omics analysis}
20
-  \item{transform}{If TRUE v1 and v2 will be Fisher transformed}
21
-  \item{subsampling}{If TRUE subsampling will be run}
22
-  \item{subSize}{Indicates how many feature pairs to be used for subsampling. Default is the feature size in x}
23
-  \item{iter}{Number of iterations for subsampling. Default is 100}
24
-  \item{components}{Number of components in mixture model.}
25
-}
20
+\item{v1}{Vector of correlation coefficients in group 1}
26 21
 
27
-\value{
28
-  \item{discordPPVector}{Vector of differentially correlated posterior probabilities.}
29
-  \item{discordPPMatrix}{Matrix of differentially correlated posterior probabilities where rows and columns reflect features}
30
-  \item{classVector}{Vector of classes that have the highest posterior probability}
31
-  \item{classMatrix}{Matrix of classes that have hte highest posterior probability where rows and columns reflect features}
32
-  \item{probMatrix}{Matrix of posterior probabilities where rows are each molecular feature pair and columns are nine different classes}
33
-  \item{loglik}{Final log likelihood}
34
-}
22
+\item{v2}{Vector of correlation coefficients in group 2}
35 23
 
36
-\details{
37
-The discordant algorithm is based on a Gaussian mixture model. If there are three components, correlation coefficients are clustered into negative correlations (-), positive correlations (+) and no correlation (0). If there are five components, then there are two more classes for very negative correlation (--) and very positive correlations (++). All possible combinations for these components are made into classes. If there are three components, there are 9 classes. If there are five components, there are 25 classes.
24
+\item{x}{ExpressionSet of -omics data}
38 25
 
39
-The posterior probabilities for each class are generated and outputted into the value probMatrix. The value probMatrix is a matrix where each column is a class and each row is a feature pair. The values discordPPVector and discordPPMatrix are the summed differential correlation posterior probability for each feature pair. The values classVector and classMatrix are the class with the highest posterior probability for each feature pair.
40
-}
26
+\item{y}{ExpressionSet of -omics data, induces dual -omics analysis}
41 27
 
42
-\references{
43
- Siska C, Bowler R and Kechris K. The Discordant Method: A Novel Approach for Differential Correlation (2015), Bioinformatics. 32 (5): 690-696.
44
-Lai Y, Zhang F, Nayak TK, Modarres R, Lee NH and McCaffrey TA. Concordant integrative gene set enrichment analysis of multiple large-scale two-sample expression data sets. (2014) BMC Genomics 15, S6.
45
-Lai Y, Adam B-l, Podolsky R, She J-X. A mixture model approach to the tests of concordance and discordancd between two large-scale experiments with two sample groups. (2007) Bioinformatics 23, 1243-1250.
46
-}
28
+\item{transform}{If TRUE v1 and v2 will be Fisher transformed}
47 29
 
48
-\author{
49
-Charlotte Siska <siska.charlotte@gmail.com>
50
-}
30
+\item{subsampling}{If TRUE subsampling will be run}
51 31
 
52
-\examples{
32
+\item{subSize}{Indicates how many feature pairs to be used for subsampling. 
33
+Default is the feature size in x}
53 34
 
54
-## load Data
35
+\item{iter}{Number of iterations for subsampling. Default is 100}
55 36
 
56
-data(TCGA_GBM_miRNA_microarray) # loads matrix called TCGA_GBM_miRNA_microarray
57
-data(TCGA_GBM_transcript_microarray) # loads matrix called TCGA_GBM_transcript_microarray
37
+\item{components}{Number of components in mixture model.}
38
+}
39
+\value{
40
+\describe{
41
+  \item{discordPPVector}{Vector of differentially correlated posterior 
42
+  probabilities.}
43
+  \item{discordPPMatrix}{Matrix of differentially correlated posterior 
44
+  probabilities where rows and columns reflect features}
45
+  \item{classVector}{Vector of classes that have the highest posterior 
46
+  probability}
47
+  \item{classMatrix}{Matrix of classes that have hte highest posterior 
48
+  probability where rows and columns reflect features}
49
+  \item{probMatrix}{Matrix of posterior probabilities where rows are each 
50
+  molecular feature pair and columns are nine different classes}
51
+  \item{loglik}{Final log likelihood}
52
+}
53
+}
54
+\description{
55
+Runs discordant algorithm on two vectors of correlation coefficients.
56
+}
57
+\details{
58
+The discordant algorithm is based on a Gaussian mixture model. If there are 
59
+three components, correlation coefficients are clustered into negative 
60
+correlations (-), positive correlations (+) and no correlation (0). If there 
61
+are five components, then there are two more classes for very negative 
62
+correlation (--) and very positive correlations (++). All possible 
63
+combinations for these components are made into classes. If there are three 
64
+components, there are 9 classes. If there are five components, there are 25 
65
+classes.
66
+
67
+The posterior probabilities for each class are generated and outputted into 
68
+the value probMatrix. The value probMatrix is a matrix where each column is a
69
+ class and each row is a feature pair. The values discordPPVector and 
70
+ discordPPMatrix are the summed differential correlation posterior 
71
+ probability for each feature pair. The values classVector and classMatrix 
72
+ are the class with the highest posterior probability for each feature pair.
73
+}
74
+\examples{
75
+# Load Data
76
+data(TCGA_GBM_miRNA_microarray)
77
+data(TCGA_GBM_transcript_microarray)
58 78
 print(colnames(TCGA_GBM_transcript_microarray)) # look at groups
59 79
 groups <- c(rep(1,10), rep(2,20))
60 80
 
61 81
 ## DC analysis on only transcripts pairs
62 82
 
63
-vectors <- createVectors(TCGA_GBM_transcript_microarray, groups = groups)
64
-result <- discordantRun(vectors$v1, vectors$v2, TCGA_GBM_transcript_microarray)
83
+vectors <- createVectors(TCGA_GBM_transcript_microarray, 
84
+                         groups = groups)
85
+result <- discordantRun(vectors$v1, vectors$v2, 
86
+                        TCGA_GBM_transcript_microarray)
65 87
 
66 88
 ## DC analysis on miRNA-transcript pairs
67 89
 
68
-vectors <- createVectors(TCGA_GBM_transcript_microarray, TCGA_GBM_miRNA_microarray, groups = groups, cor.method = c("pearson"))
69
-result <- discordantRun(vectors$v1, vectors$v2, TCGA_GBM_transcript_microarray, TCGA_GBM_miRNA_microarray)
90
+vectors <- createVectors(TCGA_GBM_transcript_microarray, 
91
+                         TCGA_GBM_miRNA_microarray, groups = groups, 
92
+                         cor.method = c("pearson"))
93
+result <- discordantRun(vectors$v1, vectors$v2, 
94
+                        TCGA_GBM_transcript_microarray, 
95
+                       TCGA_GBM_miRNA_microarray)
96
+}
97
+\references{
98
+Siska C, Bowler R and Kechris K. The Discordant Method: A Novel Approach for 
99
+Differential Correlation (2015), Bioinformatics. 32 (5): 690-696.
100
+
101
+Lai Y, Zhang F, Nayak TK, Modarres R, Lee NH and McCaffrey TA. Concordant 
102
+integrative gene set enrichment analysis of multiple large-scale two-sample 
103
+expression data sets. (2014) BMC Genomics 15, S6.
104
+
105
+Lai Y, Adam B-l, Podolsky R, She J-X. A mixture model approach to the tests 
106
+of concordance and discordancd between two large-scale experiments with two 
107
+sample groups. (2007) Bioinformatics 23, 1243-1250.
108
+}
109
+\author{
110
+Charlotte Siska \email{siska.charlotte@gmail.com}
70 111
 
112
+Max McGrath \email{max.mcgrath@ucdenver.edu}
71 113
 }
72
-\keyword{ model }
... ...
@@ -1,33 +1,27 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/utils.R
1 3
 \name{fishersTrans}
2 4
 \alias{fishersTrans}
3 5
 \title{Fisher Transformation of Pearson Correlation Coefficients to Z Scores}
4
-
5
-\description{
6
-   Transforms Pearsons correlation coefficients into z scores using Fishers method.
7
-}
8
-
9 6
 \usage{
10 7
 fishersTrans(rho)
11 8
 }
12
-
13 9
 \arguments{
14
-  \item{rho}{Integer or numeric vector of Pearson's correlation coefficients}
10
+\item{rho}{Integer or numeric vector of Pearson's correlation coefficients}
15 11
 }
16
-
17 12
 \value{
18 13
 Returns Fisher-transformed correlation coefficients
19 14
 }
20
-
21
-\references{
22
-Fisher, R.A. (1915). "Frequency distribution of the values of the correlation coefficient in samples of an indefinitely large population". Biometrika (Biometrika Trust) 10 (4).
15
+\description{
16
+Transforms Pearsons correlation coefficients into z scores using Fishers 
17
+method.
23 18
 }
24
-
25 19
 \details{
26
-Fisher's transformation is when correlation coefficients are transformed into a z score. These z scores have an approximately normal distribution.
20
+Fisher's transformation is when correlation coefficients are 
21
+transformed into a z score. These z scores have an approximately normal 
22
+distribution.
27 23
 }
28
-
29 24
 \examples{
30
-
31 25
 ## Create integer or list of Pearson's correlation coefficients.
32 26
 
33 27
 library(MASS)
... ...
@@ -35,6 +29,10 @@ rhoV <- as.vector(cor(t(mvrnorm(10,rep(3,100),diag(100)))))
35 29
 
36 30
 ## Determine Fisher-Transformed z scores of rho
37 31
 zV <- fishersTrans(rhoV)
38
-}
39 32
 
40
-\keyword{methods}
33
+}
34
+\references{
35
+Fisher, R.A. (1915). "Frequency distribution of the values of the correlation
36
+coefficient in samples of an indefinitely large population". Biometrika
37
+(Biometrika Trust) 10 (4).
38
+}
41 39
deleted file mode 100644
... ...
@@ -1,42 +0,0 @@
1
-\name{splitMADOutlier}
2
-\alias{splitMADOutlier}
3
-\title{Outliers using left and right MAD}
4
-
5
-\description{
6
-  Identify features with outliers using left and right median absolute deviation (MAD).
7
-}
8
-
9
-\usage{
10
-splitMADOutlier(mat, filter0 = TRUE, threshold = 2)
11
-}
12
-
13
-\arguments{
14
-  \item{mat}{mxn matrix of -omics data, where rows are features and columns samples.}
15
-  \item{filter0}{Option to filter out features if they have at least one 0 value. Default is TRUE.}
16
-  \item{threshold}{Threshold of how many MADs outside the left or right median is used to determine features with outliers.}
17
-}
18
-
19
-\value{
20
-  \item{mat.filtered}{Input matrix where features with outliers filtered out.}
21
-  \item{index}{Index of features that have no outliers.}
22
-}
23
-
24
-\details{
25
-The purpose of this function is to determine outliers in non-symmetric distributions. The distribution is split by the median. Outliers are identifed by being however many median absolute deviations (MAD) from either split distribution.
26
-}
27
-
28
-\references{
29
-Leys C, Klein O, Bernard P and Licata L. "Detecting Outliers: Do Not Use Standard Deviation Around the Mean, Use Absolute Deivation Around the Median." Journal of Experimental Social Psychology, 2013. 49(4), 764-766.
30
-Magwene, PM, Willis JH, Kelly JK and Siepel A. "The Statistics of Bulk Segregant Analysis Using Next Generation Sequencing." PLoS Computational Biology, 2011. 7(11), e1002255.
31
-}
32
-
33
-\examples{
34
-
35
-## Simulate matrix of continuous -omics data.
36
-data(TCGA_Breast_miRNASeq)
37
-
38
-## Filter matrix based on outliers.
39
-mat.filtered <- splitMADOutlier(TCGA_Breast_miRNASeq)$mat.filtered
40
-}
41
-
42
-\keyword{methods}
43 0
new file mode 100644
... ...
@@ -0,0 +1,51 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/splitMADOutlier.R
3
+\name{splitMADOutlier}
4
+\alias{splitMADOutlier}
5
+\title{Outliers using left and right MAD}
6
+\usage{
7
+splitMADOutlier(mat, filter0 = TRUE, threshold = 2)
8
+}
9
+\arguments{
10
+\item{mat}{m by n matrix of -omics data, where rows are features and columns 
11
+samples.}
12
+
13
+\item{filter0}{Option to filter out features if they have at least one 0 
14
+value. Default is TRUE.}
15
+
16
+\item{threshold}{Threshold of how many MADs outside the left or right median 
17
+is used to determine features with outliers.}
18
+}
19
+\value{
20
+\describe{
21
+\item{mat.filtered}{Input matrix where features with outliers filtered out.}
22
+\item{index}{Index of features that have no outliers.}
23
+}
24
+}
25
+\description{
26
+Identify features with outliers using left and right median absolute 
27
+deviation (MAD).
28
+}
29
+\details{
30
+The purpose of this function is to determine outliers in 
31
+non-symmetric distributions. The distribution is split by the median. 
32
+Outliers are identifed by being however many median absolute deviations (MAD)
33
+from either split distribution.
34
+}
35
+\examples{
36
+## Simulate matrix of continuous -omics data.
37
+data(TCGA_Breast_miRNASeq)
38
+
39
+## Filter matrix based on outliers.
40
+mat.filtered <- splitMADOutlier(TCGA_Breast_miRNASeq)$mat.filtered
41
+
42
+}
43
+\references{
44
+Leys C, Klein O, Bernard P and Licata L. "Detecting Outliers: Do Not Use 
45
+Standard Deviation Around the Mean, Use Absolute Deivation Around the 
46
+Median." Journal of Experimental Social Psychology, 2013. 49(4), 764-766.
47
+
48
+Magwene, PM, Willis JH, Kelly JK and Siepel A. "The Statistics of Bulk 
49
+Segregant Analysis Using Next Generation Sequencing." PLoS Computational 
50
+Biology, 2011. 7(11), e1002255.
51
+}