Browse code

moved to a SummarizedExperiment backend for class 'BSseq'

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/bsseq@78143 bc3139a8-67e5-0310-9ffc-ced21a209358

khansen authored on 06/07/2013 21:26:40
Showing 30 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: bsseq
2
-Version: 0.9.3
2
+Version: 0.9.4
3 3
 Title: Analyze, manage and store bisulfite sequencing data
4 4
 Description: Tools for analyzing and visualizing bisulfite sequencing data
5 5
 Author: Kasper Daniel Hansen
... ...
@@ -8,8 +8,9 @@ Depends: R (>= 2.15), methods, BiocGenerics, IRanges, GenomicRanges,
8 8
         parallel, matrixStats
9 9
 Imports: scales, stats, graphics, Biobase, locfit
10 10
 Suggests: RUnit, bsseqData
11
-Collate: hasGRanges.R BSseq_class.R BSseq_utils.R utils.R read.bsmooth.R
12
-        read.bismark.R BSmooth.R dmrFinder.R gof_stats.R plotting.R fisher.R fixes.R
11
+Collate: hasGRanges.R BSseq_class.R BSseqTstat_class.R BSseq_utils.R combine.R 
12
+        utils.R read.bsmooth.R read.bismark.R BSmooth.R dmrFinder.R 
13
+        gof_stats.R plotting.R fisher.R fixes.R
13 14
 License: Artistic-2.0
14 15
 LazyData: yes
15 16
 biocViews: DNAMethylation
... ...
@@ -7,7 +7,7 @@ importFrom(BiocGenerics, "anyDuplicated", "cbind", "colnames",
7 7
            "combine", "density", "intersect", "lapply", "ncol",
8 8
            "nrow", "order", "paste", "pmax", "pmin", "rbind",
9 9
            "Reduce", "rep.int", "rownames", "sapply", "setdiff",
10
-           "strand", "strand<-", "union", "unique")
10
+           "strand", "strand<-", "union", "unique", "updateObject")
11 11
 
12 12
 importFrom(stats, "approxfun", "fisher.test", "ppoints",
13 13
            "predict", "preplot", "qchisq",
... ...
@@ -25,9 +25,8 @@ importFrom(scales, "alpha")
25 25
 importClassesFrom(Biobase, "AnnotatedDataFrame")
26 26
 importMethodsFrom(Biobase, "annotatedDataFrameFrom",
27 27
                   "pData", "pData<-",
28
-                  "phenoData", "phenoData<-",
29 28
                   "sampleNames", "sampleNames<-")
30
-
29
+importFrom(Biobase, "validMsg")
31 30
 
32 31
 ##
33 32
 ## Exporting
... ...
@@ -50,10 +49,9 @@ exportMethods("[", "show",
50 49
               "granges",
51 50
               "dim", "nrow", "ncol",
52 51
               "sampleNames", "sampleNames<-",
53
-              "phenoData", "phenoData<-",
54 52
               "pData", "pData<-",
55 53
               "findOverlaps", "subsetByOverlaps",
56
-              "combine")
54
+              "combine", "updateObject")
57 55
 
58 56
 export("BSseq", "getMeth", "getCoverage", "getBSseq", "getStats",
59 57
        "collapseBSseq", "orderBSseq", "hasBeenSmoothed", "chrSelectBSseq",
... ...
@@ -122,7 +122,11 @@ BSmooth <- function(BSseq, ns = 70, h = 1000, maxGap = 10^8, parallelBy = c("sam
122 122
         rownames(se.coef) <- NULL
123 123
         colnames(se.coef) <- sampleNames(BSseq)
124 124
     }
125
-    
125
+
126
+    if(!is.null(coef))
127
+        assay(BSseq, "coef") <- coef
128
+    if(!is.null(se.coef))
129
+        assay(BSseq, "se.coef") <- se.coef
126 130
     mytrans <- function(x) {
127 131
         y <- x
128 132
         ix <- which(x < 0)
... ...
@@ -133,8 +137,6 @@ BSmooth <- function(BSseq, ns = 70, h = 1000, maxGap = 10^8, parallelBy = c("sam
133 137
     }
134 138
     environment(mytrans) <- baseenv()
135 139
     BSseq@trans <- mytrans
136
-    BSseq@coef <- coef
137
-    BSseq@se.coef <- se.coef
138 140
     parameters <- list(smoothText = sprintf("BSmooth (ns = %d, h = %d, maxGap = %d)", ns, h, maxGap),
139 141
                        ns = ns, h = h, maxGap = maxGap)
140 142
     BSseq@parameters <- parameters
141 143
new file mode 100644
... ...
@@ -0,0 +1,39 @@
1
+setClass("BSseqTstat", contains = "hasGRanges", 
2
+         representation(stats = "matrix",
3
+                        parameters = "list")
4
+         )
5
+setValidity("BSseqTstat", function(object) {
6
+    msg <- NULL
7
+    if(length(object@gr) != nrow(object@stats))
8
+        msg <- c(msg, "length of 'gr' is different from the number of rows of 'stats'")
9
+    if(is.null(msg)) TRUE else msg
10
+})
11
+
12
+setMethod("show", signature(object = "BSseqTstat"),
13
+          function(object) {
14
+              cat("An object of type 'BSseqTstat' with\n")
15
+              cat(" ", length(object), "methylation loci\n")
16
+              cat("based on smoothed data:\n")
17
+              cat(" ", object@parameters$smoothText, "\n")
18
+              cat("with parameters\n")
19
+              cat(" ", object@parameters$tstatText, "\n")
20
+          })
21
+
22
+setMethod("[", "BSseqTstat", function(x, i, ...) {
23
+    if(missing(i))
24
+        stop("need [i] for subsetting")
25
+    if(missing(i))
26
+        return(x)
27
+    x@gr <- x@gr[i]
28
+    x@stats <- x@stats[i,, drop = FALSE]
29
+    x
30
+})
31
+
32
+BSseqTstat <- function(gr = NULL, stats = NULL, parameters = NULL) {
33
+    out <- new("BSseqTstat")
34
+    out@gr <- gr
35
+    out@stats <- stats
36
+    out@parameters <- parameters
37
+    out
38
+}
39
+
... ...
@@ -1,69 +1,25 @@
1
-setClassUnion("matrixOrNULL", c("matrix", "NULL"))
2
-
3
-setClass("BSseq", contains = "hasGRanges", 
4
-         representation(M = "matrix",
5
-                        Cov = "matrix",
6
-                        coef = "matrixOrNULL",
7
-                        se.coef = "matrixOrNULL",
8
-                        trans = "function",
9
-                        parameters = "list",
10
-                        phenoData = "AnnotatedDataFrame"))
11
-
12
-
13
-setClass("BSseqTstat", contains = "hasGRanges", 
14
-         representation(stats = "matrix",
15
-                        parameters = "list")
16
-         )
17
-         
18
-
19
-##
20
-## Simple accessor and replacement functions
21
-##
1
+setClass("BSseq", contains = "SummarizedExperiment", 
2
+         representation(trans = "function",
3
+                        parameters = "list"))
22 4
 
23 5
 setValidity("BSseq", function(object) {
24
-    msg <- NULL
25
-    nCpGs <- length(object@gr)
26
-    nSamples <- ncol(object@M)
27
-    if(nCpGs != nrow(object@M) ||
28
-       nCpGs != nrow(object@Cov))
29
-        msg <- c(msg, "length of the 'gr' slot is not equal to the nrows the 'M' or 'Cov' slots")
30
-    if(nSamples != ncol(object@Cov))
31
-        msg <- c(msg, "ncols of the 'M' slot is not equal to the ncols of the 'Cov' slot")
32
-    if(nSamples != nrow(object@phenoData))
33
-        msg <- c(msg, "the 'phenoData' slot does not match the dimensions of the 'M' slot")
34
-    if(any(object@M < 0))
35
-        msg <- c(msg, "the 'M' slot has negative entries")
36
-    if(any(object@Cov < 0))
37
-        msg <- c(msg, "the 'Cov' slot has negative entries")
38
-    if(any(object@M > object@Cov))
39
-        msg <- c(msg, "the 'M' slot has at least one entry bigger than the 'Cov' slot")
40
-    if(!is.null(object@coef) && (nCpGs != nrow(object@coef) ||
41
-                                 nSamples != ncol(object@coef)))
42
-        msg <- c(msg, "nrows and/or ncols of the 'coef' slot does not have the same dimensions as the 'M' slot")
43
-    if(!is.null(object@se.coef) && (nCpGs != nrow(object@se.coef) ||
44
-                                    nSamples != ncol(object@se.coef)))
45
-        msg <- c(msg, "nrows and/or ncols of the 'se.coef' slot does not have the same dimensions as the 'M' slot")
46
-    if(!is.null(rownames(object@M)) ||
47
-       !is.null(rownames(object@Cov)) ||
48
-       !is.null(rownames(object@coef)) ||
49
-       !is.null(rownames(object@se.coef))) warning("unnecessary rownames in object")
50
-    ## FIXME: check samplenames
51
-    if(any(colnames(object@M) != rownames(pData(object))) ||
52
-       any(colnames(object@M) != rownames(object@Cov)))
53
-        msg <- c(msg, "sample names are messed up: colnames of the M slot has to equal colnames of the Cov slot which has to equal rownames of the phenoData slot")
54
-    if(is.null(msg)) TRUE else msg
55
-})
56
-
57
-
58
-
59
-setValidity("BSseqTstat", function(object) {
60
-    msg <- NULL
61
-    if(length(object@gr) != nrow(object@stats))
62
-        msg <- c(msg, "length of 'gr' is different from the number of rows of 'stats'")
63
-    if(is.null(msg)) TRUE else msg
6
+    msg <- validMsg(NULL, .checkAssayNames(object, c("Cov", "M")))
7
+    if(class(rowData(object)) != "GRanges")
8
+        msg <- validMsg(msg, sprintf("object of class '%s' needs to have a 'GRanges' in slot 'rowData'", class(object)))
9
+    if(any(assay(object, "M") < 0))
10
+        msg <- validMsg(msg, "the 'M' assay has negative entries")
11
+    if(any(assay(object, "Cov") < 0))
12
+        msg <- validMsg(msg, "the 'Cov' assay has negative entries")
13
+    if(any(assay(object, "M") > assay(object, "Cov")))
14
+        msg <- validMsg(msg, "the 'M' assay has at least one entry bigger than the 'Cov' assay")
15
+    if(!is.null(rownames(assay(object, "M"))) ||
16
+       !is.null(rownames(assay(object, "Cov"))) ||
17
+       ("coef" %in% names(assays(object)) && !is.null(rownames(assay(object, "coef")))) ||
18
+       ("se.coef" %in% names(assays(object)) && !is.null(rownames(assay(object, "se.coef")))))
19
+        warning("unnecessary rownames in object")
20
+    if (is.null(msg)) TRUE else msg
64 21
 })
65 22
 
66
-
67 23
 setMethod("show", signature(object = "BSseq"),
68 24
           function(object) {
69 25
               cat("An object of type 'BSseq' with\n")
... ...
@@ -77,251 +33,68 @@ setMethod("show", signature(object = "BSseq"),
77 33
               }
78 34
           })
79 35
 
80
-setMethod("show", signature(object = "BSseqTstat"),
81
-          function(object) {
82
-              cat("An object of type 'BSseqTstat' with\n")
83
-              cat(" ", length(object), "methylation loci\n")
84
-              cat("based on smoothed data:\n")
85
-              cat(" ", object@parameters$smoothText, "\n")
86
-              cat("with parameters\n")
87
-              cat(" ", object@parameters$tstatText, "\n")
88
-          })
89
-
90
-
91
-##
92
-## eSet stuff
93
-##
94
-
95
-setMethod("dim", "BSseq", function(x) {
96
-    dim(x@M)
97
-})
98
-setMethod("nrow", "BSseq", function(x) {
99
-    nrow(x@M)
100
-})
101
-setMethod("ncol", "BSseq", function(x) {
102
-    ncol(x@M)
36
+setMethod("pData", "BSseq", function(object) {
37
+    object@colData
103 38
 })
104 39
 
105
-setMethod("phenoData", "BSseq", function(object) {
106
-    object@phenoData
107
-})
108
-setReplaceMethod("phenoData", signature = signature(
109
-                              object = "BSseq",
110
-                              value = "AnnotatedDataFrame"),
40
+setReplaceMethod("pData",
41
+                 signature = signature(
42
+                      object = "BSseq",
43
+                      value = "data.frame"),
111 44
                  function(object, value) {
112
-                     object@phenoData <- value
45
+                     object@colData <- as(value, "DataFrame")
113 46
                      object
114 47
                  })
115 48
 
116
-setMethod("pData", "BSseq", function(object) {
117
-    pData(phenoData(object))
118
-})
119
-setReplaceMethod("pData", signature = signature(
120
-                              object = "BSseq",
121
-                              value = "data.frame"),
49
+setReplaceMethod("pData",
50
+                 signature = signature(
51
+                      object = "BSseq",
52
+                      value = "DataFrame"),
122 53
                  function(object, value) {
123
-                     pd <- object@phenoData
124
-                     pData(pd) <- value
125
-                     object@phenoData <- pd
54
+                     object@colData <- value
126 55
                      object
127 56
                  })
128 57
 
129 58
 setMethod("sampleNames", "BSseq", function(object) {
130
-    sampleNames(phenoData(object))
59
+    colnames(object)
131 60
 })
132 61
 
133 62
 setReplaceMethod("sampleNames",
134
-                 signature(object = "BSseq", value = "ANY"),
63
+                 signature = signature(
64
+                     object = "BSseq",
65
+                     value = "ANY"),
135 66
                  function(object, value) {
136
-                     sampleNames(phenoData(object)) <- value
137
-                     colnames(object@M) <- value
138
-                     colnames(object@Cov) <- value
139
-                     if(!is.null(object@coef))
140
-                         colnames(object@coef) <- value
141
-                     if(!is.null(object@se.coef))
142
-                         colnames(object@se.coef) <- value
67
+                     colnames(object) <- value
143 68
                      object
144 69
                  })
145 70
 
146
-hasBeenSmoothed <- function(BSseq) {
147
-    !is.null(BSseq@coef)
148
-}
149
-
150
-##
151
-## Subsetting and combining
152
-##
153
-
154
-setMethod("[", "BSseq", function(x, i, j, ...) {
155
-    if(missing(drop))
156
-        drop <- FALSE
157
-    if(missing(i) && missing(j))
158
-        stop("need [i,j] for subsetting")
159
-    if(!missing(j))
160
-        x@phenoData <- phenoData(x)[j,, ..., drop = drop]
161
-    else
162
-        x@phenoData <- phenoData(x)
163
-    if(missing(i)) {
164
-        x@M <- getBSseq(x, "M")[, j, drop = FALSE]
165
-        x@Cov <- getBSseq(x, "Cov")[, j, drop = FALSE]
166
-        x@coef <- getBSseq(x, "coef")[, j, drop = FALSE]
167
-        x@se.coef <- getBSseq(x, "se.coef")[, j, drop = FALSE]
168
-        x@gr <- granges(x)
169
-    } else {
170
-        x@M <- getBSseq(x, "M")[i, j, drop = FALSE]
171
-        x@Cov <- getBSseq(x, "Cov")[i, j, drop = FALSE]
172
-        x@coef <- getBSseq(x, "coef")[i, j, drop = FALSE]
173
-        x@se.coef <- getBSseq(x, "se.coef")[i, j, drop = FALSE]
174
-        x@gr <- granges(x)[i]
175
-    }
176
-    x
177
-})
178
-
179
-setMethod("[", "BSseqTstat", function(x, i, ...) {
180
-    if(missing(i))
181
-        stop("need [i] for subsetting")
182
-    if(missing(i))
183
-        return(x)
184
-    x@gr <- x@gr[i]
185
-    x@stats <- x@stats[i,, drop = FALSE]
186
-    x
187
-})
188
-
189
-setMethod("combine", signature(x = "BSseq", y = "BSseq"), function(x, y, ...) {
190
-    ## All of this assumes that we are place x and y "next" to each other,
191
-    ##  ie. we are not combining the same set of samples sequenced at different times
192
-    if (class(x) != class(y))
193
-        stop(paste("objects must be the same class, but are ",
194
-                   class(x), ", ", class(y), sep=""))
195
-    if(hasBeenSmoothed(x) && hasBeenSmoothed(y) && !all.equal(x@trans, y@trans))
196
-        stop("'x' and 'y' need to be smoothed on the same scale")
197
-    phenoData <- combine(phenoData(x), phenoData(y))
198
-    if(identical(granges(x), granges(y))) {
199
-        gr <- granges(x)
200
-        M <- cbind(getBSseq(x, "M"), getBSseq(y, "M"))
201
-        Cov <- cbind(getBSseq(x, "Cov"), getBSseq(y, "Cov"))
202
-        if(!hasBeenSmoothed(x) || !hasBeenSmoothed(y)) {
203
-            coef <- NULL
204
-            se.coef <- NULL
205
-            trans <- NULL
206
-        } else {
207
-            coef <- cbind(getBSseq(x, "coef"), getBSseq(y, "coef"))
208
-            se.coef <- cbind(getBSseq(x, "se.coef"), getBSseq(y, "se.coef"))
209
-            trans <- getBSseq(x, "trans")
210
-        }
211
-    } else {
212
-        gr <- reduce(c(granges(x), granges(y)), min.gapwidth = 0L)
213
-        mm.x <- as.matrix(findOverlaps(gr, granges(x)))
214
-        mm.y <- as.matrix(findOverlaps(gr, granges(y)))
215
-        sampleNames <- c(sampleNames(x), sampleNames(y))
216
-        ## FIXME: there is no check that the two sampleNames are disjoint.
217
-        M <- Cov <- matrix(0, nrow = length(gr), ncol = length(sampleNames))
218
-        colnames(M) <- colnames(Cov) <- sampleNames
219
-        M[mm.x[,1], 1:ncol(x)] <- getBSseq(x, "M")[mm.x[,2],]
220
-        M[mm.y[,1], ncol(x) + 1:ncol(y)] <- getBSseq(y, "M")[mm.y[,2],]
221
-        Cov[mm.x[,1], 1:ncol(x)] <- getBSseq(x, "Cov")[mm.x[,2],]
222
-        Cov[mm.y[,1], ncol(x) + 1:ncol(y)] <- getBSseq(y, "Cov")[mm.y[,2],]
223
-        if(!hasBeenSmoothed(x) || !hasBeenSmoothed(y)) {
224
-            coef <- NULL
225
-            se.coef <- NULL
226
-            trans <- NULL
227
-        } else {
228
-            trans <- x@trans
229
-            coef <- matrix(0, nrow = length(gr), ncol = length(sampleNames))
230
-            colnames(coef) <- sampleNames(phenoData)
231
-            if(hasBeenSmoothed(x))
232
-                coef[mm.x[,1], 1:ncol(x)] <- getBSseq(x, "coef")[mm.x[,2],]
233
-            if(hasBeenSmoothed(y))
234
-                coef[mm.y[,1], ncol(x) + 1:ncol(y)] <- getBSseq(y, "coef")[mm.y[,2],]
235
-            if(is.null(getBSseq(x, "se.coef")) && is.null(getBSseq(x, "se.coef")))
236
-                se.coef <- NULL
237
-            else {
238
-                se.coef <- matrix(0, nrow = length(gr), ncol = length(sampleNames))
239
-                colnames(se.coef) <- sampleNames(phenoData)
240
-                if(!is.null(getBSseq(x, "se.coef")))
241
-                    se.coef[mm.x[,1], 1:ncol(x)] <- getBSseq(x, "se.coef")[mm.x[,2],]
242
-                if(!is.null(getBSseq(y, "se.coef")))
243
-                    se.coef[mm.y[,1], ncol(x) + 1:ncol(y)] <- getBSseq(y, "se.coef")[mm.y[,2],]
244
-            }
245
-        }
246
-    }
247
-    BSseq(gr = gr, M = M, Cov = Cov, coef = coef, se.coef = se.coef,
248
-          phenoData = phenoData, trans = trans, rmZeroCov = FALSE)
71
+setMethod("length", "BSseq", function(x) {
72
+    length(granges(x))
249 73
 })
250 74
 
251
-combineList <- function(x, ...) {
252
-    if(class(x) == "BSseq")
253
-        x <- list(x, ...)
254
-    stopifnot(all(sapply(x, class) == "BSseq"))
255
-    gr <- getBSseq(x[[1]], "gr")
256
-    trans <- getBSseq(x[[1]], "trans")
257
-    sameTrans <- sapply(x[-1], function(xx) {
258
-        identical(trans, getBSseq(xx, "trans"))
259
-    })
260
-    if(!all(sameTrans))
261
-        stop("all elements of '...' in combineList needs to have the same trans")
262
-    sameGr <- sapply(x[-1], function(xx) {
263
-        identical(trans, getBSseq(xx, "gr"))
264
-    })
265
-    if(all(sameGr)) {
266
-        M <- do.call(cbind, lapply(x, function(xx) getBSseq(xx, "M")))
267
-        Cov <- do.call(cbind, lapply(x, function(xx) getBSseq(xx, "Cov")))
268
-    } else {
269
-        gr <- sort(reduce(do.call(c, unname(lapply(x, granges))), min.gapwidth = 0L))
270
-        sampleNames <- do.call(c, lapply(x, sampleNames))
271
-        M <- matrix(0, ncol = length(sampleNames), nrow = length(gr))
272
-        colnames(M) <- sampleNames
273
-        Cov <- M
274
-        idxes <- split(seq(along = sampleNames), rep(seq(along = x), times = sapply(x, ncol)))
275
-        for(ii in seq(along = idxes)) {
276
-            ov <- findOverlaps(gr, granges(x[[ii]]))
277
-            idx <- idxes[[ii]]
278
-            M[queryHits(ov), idx] <- getBSseq(x[[ii]], "M")[subjectHits(ov),]
279
-            Cov[queryHits(ov), idx] <- getBSseq(x[[ii]], "Cov")[subjectHits(ov),]
280
-        }
281
-    }
282
-    if(any(!sapply(x, hasBeenSmoothed)) || !(all(sameGr))) {
283
-        coef <- NULL
284
-        se.coef <- NULL
285
-        trans <- NULL
286
-    } else {
287
-        coef <- do.call(cbind, lapply(x, function(xx) getBSseq(xx, "coef")))
288
-        se.coef <- do.call(cbind, lapply(x, function(xx) getBSseq(xx, "se.coef")))
289
-    }
290
-    phenoData <- Reduce(combine, lapply(x, phenoData))
291
-    BSseq(gr = gr, M = M, Cov = Cov, coef = coef, se.coef = se.coef,
292
-          phenoData = phenoData, trans = trans, rmZeroCov = FALSE)
75
+hasBeenSmoothed <- function(BSseq) {
76
+    "coef" %in% names(assays(BSseq))
293 77
 }
294 78
 
295 79
 getBSseq <- function(BSseq, type = c("Cov", "M", "gr", "coef", "se.coef", "trans", "parameters")) {
296 80
     type <- match.arg(type)
297
-    switch(type,
298
-           "Cov" = {
299
-               return(BSseq@Cov)
300
-           },
301
-           "M" = {
302
-               return(BSseq@M)
303
-           },
304
-           "gr" = {
305
-               return(BSseq@gr)
306
-           },
307
-           "coef" = {
308
-               return(BSseq@coef)
309
-           },
310
-           "se.coef" = {
311
-               return(BSseq@se.coef)
312
-           },
313
-           "trans" = {
314
-               return(BSseq@trans)
315
-           },
316
-           "parameters" = {
317
-               return(BSseq@parameters)
318
-           })
81
+    if(type %in% c("M", "Cov"))
82
+        return(assay(BSseq, type))
83
+    if(type %in% c("coef", "se.coef") && type %in% names(assays(BSseq)))
84
+        return(assay(BSseq, type))
85
+    if(type %in% c("coef", "se.coef"))
86
+       return(NULL)
87
+    if(type == "trans")
88
+        return(BSseq@trans)
89
+    if(type == "parameters")
90
+        return(BSseq@parameters)
91
+    if(type == "gr")
92
+        return(BSseq@rowData)
93
+    
319 94
 }
320 95
 
321
-
322
-
323 96
 BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
324
-                  trans = NULL, parameters = NULL, phenoData = NULL,
97
+                  trans = NULL, parameters = NULL, pData = NULL,
325 98
                   gr = NULL, pos = NULL, chr = NULL, sampleNames = NULL,
326 99
                   rmZeroCov = FALSE) {
327 100
     if(is.null(gr)) {
... ...
@@ -350,8 +123,10 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
350 123
     if(!is.null(names(gr)))
351 124
         names(gr) <- NULL
352 125
     ## deal with sampleNames
353
-    if(is.null(sampleNames) && !is.null(phenoData) && !is.null(sampleNames(phenoData)))
354
-        sampleNames <- sampleNames(phenoData)
126
+    if(!is(pData, "DataFrame"))
127
+        pData <- as(pData, "DataFrame")
128
+    if(is.null(sampleNames) && !is.null(pData) && !is.null(rownames(pData)))
129
+        sampleNames <- rownames(pData)
355 130
     if(is.null(sampleNames) && !is.null(colnames(M)))
356 131
         sampleNames <- colnames(M)
357 132
     if(is.null(sampleNames) && !is.null(colnames(Cov)))
... ...
@@ -401,31 +176,33 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
401 176
         colnames(M) <- sampleNames
402 177
     if(is.null(colnames(Cov)) || any(sampleNames != colnames(Cov)))
403 178
         colnames(Cov) <- sampleNames
404
-    if(is.null(phenoData))
405
-        phenoData <- annotatedDataFrameFrom(M, byrow = FALSE)
406
-    BSseq <- new("BSseq", gr = gr, M = M, Cov = Cov, phenoData = phenoData)
407 179
     if(!is.null(coef)) {
408 180
         if(!is.matrix(coef) ||
409
-           nrow(coef) != nrow(BSseq) ||
410
-           ncol(coef) != ncol(BSseq))
181
+           nrow(coef) != nrow(M) ||
182
+           ncol(coef) != ncol(M))
411 183
             stop("'coef' does not have the right dimensions")
412 184
         if(is.null(colnames(coef)) || any(sampleNames != colnames(coef)))
413 185
             colnames(coef) <- sampleNames
414 186
         if(!is.null(rownames(coef)))
415 187
             rownames(coef) <- NULL
416
-        BSseq@coef <- coef
417 188
     }
418 189
     if(!is.null(se.coef)) {
419 190
         if(!is.matrix(se.coef) ||
420
-           nrow(se.coef) != nrow(BSseq) ||
421
-           ncol(se.coef) != ncol(BSseq))
191
+           nrow(se.coef) != nrow(M) ||
192
+           ncol(se.coef) != ncol(M))
422 193
             stop("'se.coef' does not have the right dimensions")
423 194
         if(is.null(colnames(se.coef)) || any(sampleNames != colnames(se.coef)))
424 195
             colnames(se.coef) <- sampleNames
425 196
         if(!is.null(rownames(se.coef)))
426 197
             rownames(se.coef) <- NULL
427
-        BSseq@se.coef <- se.coef
428 198
     }
199
+    assays <- SimpleList(M = M, Cov = Cov, coef = coef, se.coef = se.coef)
200
+    assays <- assays[!sapply(assays, is.null)]
201
+    if(is.null(pData) || all(dim(pData) == c(0,0)))
202
+        BSseq <- SummarizedExperiment(assays = assays, rowData = gr)
203
+    else
204
+        BSseq <- SummarizedExperiment(assays = assays, rowData = gr, colData = pData)
205
+    BSseq <- as(BSseq, "BSseq")
429 206
     if(is.function(trans))
430 207
         BSseq@trans <- trans
431 208
     if(is.list(parameters))
... ...
@@ -434,13 +211,15 @@ BSseq <- function(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
434 211
 }
435 212
 
436 213
 
437
-BSseqTstat <- function(gr = NULL, stats = NULL, parameters = NULL) {
438
-    out <- new("BSseqTstat")
439
-    out@gr <- gr
440
-    out@stats <- stats
441
-    out@parameters <- parameters
442
-    out
443
-}
214
+setMethod("updateObject", "BSseq",
215
+          function(object, ...) {
216
+               if(!is(try(object@assays, silent = TRUE), "try-error"))
217
+                   return(object)
218
+               BSseq(gr = object@gr, M = object@M, Cov = object@Cov,
219
+                     coef = object@coef, se.coef = object@se.coef,
220
+                     trans = object@trans, parameters = object@parameters,
221
+                     pData = object@phenoData@data)
222
+           })
444 223
 
445 224
 
446 225
 
... ...
@@ -10,14 +10,14 @@ collapseBSseq <- function(BSseq, columns) {
10 10
         sp <- split(names(columns), columns)
11 11
     }
12 12
     M <- do.call(cbind, lapply(sp, function(ss) {
13
-        rowSums(BSseq@M[, ss, drop = FALSE])
13
+        rowSums(getBSseq(BSseq, "M")[, ss, drop = FALSE])
14 14
     }))
15 15
     colnames(M) <- names(sp)
16 16
     Cov <- do.call(cbind, lapply(sp, function(ss) {
17
-        rowSums(BSseq@Cov[, ss, drop = FALSE])
17
+        rowSums(getBSseq(BSseq, "M")[, ss, drop = FALSE])
18 18
     }))
19 19
     colnames(Cov) <- names(sp)
20
-    BSseq(gr = BSseq@gr, M = M, Cov = Cov)
20
+    BSseq(gr = getBSseq(BSseq, "gr"), M = M, Cov = Cov)
21 21
 }
22 22
 
23 23
 chrSelectBSseq <- function(BSseq, seqnames = NULL, order = FALSE) {
24 24
new file mode 100644
... ...
@@ -0,0 +1,106 @@
1
+setMethod("combine", signature(x = "BSseq", y = "BSseq"), function(x, y, ...) {
2
+    ## All of this assumes that we are place x and y "next" to each other,
3
+    ##  ie. we are not combining the same set of samples sequenced at different times
4
+    if (class(x) != class(y))
5
+        stop(paste("objects must be the same class, but are ",
6
+                   class(x), ", ", class(y), sep=""))
7
+    if(hasBeenSmoothed(x) && hasBeenSmoothed(y) && !all.equal(x@trans, y@trans))
8
+        stop("'x' and 'y' need to be smoothed on the same scale")
9
+    pData <- combine(as(pData(x), "data.frame"), as(pData(y), "data.frame"))
10
+    if(identical(granges(x), granges(y))) {
11
+        gr <- granges(x)
12
+        M <- cbind(getBSseq(x, "M"), getBSseq(y, "M"))
13
+        Cov <- cbind(getBSseq(x, "Cov"), getBSseq(y, "Cov"))
14
+        if(!hasBeenSmoothed(x) || !hasBeenSmoothed(y)) {
15
+            coef <- NULL
16
+            se.coef <- NULL
17
+            trans <- NULL
18
+        } else {
19
+            coef <- cbind(getBSseq(x, "coef"), getBSseq(y, "coef"))
20
+            se.coef <- cbind(getBSseq(x, "se.coef"), getBSseq(y, "se.coef"))
21
+            trans <- getBSseq(x, "trans")
22
+        }
23
+    } else {
24
+        gr <- reduce(c(granges(x), granges(y)), min.gapwidth = 0L)
25
+        mm.x <- as.matrix(findOverlaps(gr, granges(x)))
26
+        mm.y <- as.matrix(findOverlaps(gr, granges(y)))
27
+        sampleNames <- c(sampleNames(x), sampleNames(y))
28
+        ## FIXME: there is no check that the two sampleNames are disjoint.
29
+        M <- Cov <- matrix(0, nrow = length(gr), ncol = length(sampleNames))
30
+        colnames(M) <- colnames(Cov) <- sampleNames
31
+        M[mm.x[,1], 1:ncol(x)] <- getBSseq(x, "M")[mm.x[,2],]
32
+        M[mm.y[,1], ncol(x) + 1:ncol(y)] <- getBSseq(y, "M")[mm.y[,2],]
33
+        Cov[mm.x[,1], 1:ncol(x)] <- getBSseq(x, "Cov")[mm.x[,2],]
34
+        Cov[mm.y[,1], ncol(x) + 1:ncol(y)] <- getBSseq(y, "Cov")[mm.y[,2],]
35
+        if(!hasBeenSmoothed(x) || !hasBeenSmoothed(y)) {
36
+            coef <- NULL
37
+            se.coef <- NULL
38
+            trans <- NULL
39
+        } else {
40
+            trans <- x@trans
41
+            coef <- matrix(0, nrow = length(gr), ncol = length(sampleNames))
42
+            colnames(coef) <- rownames(pData)
43
+            if(hasBeenSmoothed(x))
44
+                coef[mm.x[,1], 1:ncol(x)] <- getBSseq(x, "coef")[mm.x[,2],]
45
+            if(hasBeenSmoothed(y))
46
+                coef[mm.y[,1], ncol(x) + 1:ncol(y)] <- getBSseq(y, "coef")[mm.y[,2],]
47
+            if(is.null(getBSseq(x, "se.coef")) && is.null(getBSseq(x, "se.coef")))
48
+                se.coef <- NULL
49
+            else {
50
+                se.coef <- matrix(0, nrow = length(gr), ncol = length(sampleNames))
51
+                colnames(se.coef) <- sampleNames(pData)
52
+                if(!is.null(getBSseq(x, "se.coef")))
53
+                    se.coef[mm.x[,1], 1:ncol(x)] <- getBSseq(x, "se.coef")[mm.x[,2],]
54
+                if(!is.null(getBSseq(y, "se.coef")))
55
+                    se.coef[mm.y[,1], ncol(x) + 1:ncol(y)] <- getBSseq(y, "se.coef")[mm.y[,2],]
56
+            }
57
+        }
58
+    }
59
+    BSseq(gr = gr, M = M, Cov = Cov, coef = coef, se.coef = se.coef,
60
+          pData = pData, trans = trans, rmZeroCov = FALSE)
61
+})
62
+
63
+combineList <- function(x, ...) {
64
+    if(class(x) == "BSseq")
65
+        x <- list(x, ...)
66
+    stopifnot(all(sapply(x, class) == "BSseq"))
67
+    gr <- getBSseq(x[[1]], "gr")
68
+    trans <- getBSseq(x[[1]], "trans")
69
+    sameTrans <- sapply(x[-1], function(xx) {
70
+        identical(trans, getBSseq(xx, "trans"))
71
+    })
72
+    if(!all(sameTrans))
73
+        stop("all elements of '...' in combineList needs to have the same trans")
74
+    sameGr <- sapply(x[-1], function(xx) {
75
+        identical(trans, getBSseq(xx, "gr"))
76
+    })
77
+    if(all(sameGr)) {
78
+        M <- do.call(cbind, lapply(x, function(xx) getBSseq(xx, "M")))
79
+        Cov <- do.call(cbind, lapply(x, function(xx) getBSseq(xx, "Cov")))
80
+    } else {
81
+        gr <- sort(reduce(do.call(c, unname(lapply(x, granges))), min.gapwidth = 0L))
82
+        sampleNames <- do.call(c, lapply(x, sampleNames))
83
+        M <- matrix(0, ncol = length(sampleNames), nrow = length(gr))
84
+        colnames(M) <- sampleNames
85
+        Cov <- M
86
+        idxes <- split(seq(along = sampleNames), rep(seq(along = x), times = sapply(x, ncol)))
87
+        for(ii in seq(along = idxes)) {
88
+            ov <- findOverlaps(gr, granges(x[[ii]]))
89
+            idx <- idxes[[ii]]
90
+            M[queryHits(ov), idx] <- getBSseq(x[[ii]], "M")[subjectHits(ov),]
91
+            Cov[queryHits(ov), idx] <- getBSseq(x[[ii]], "Cov")[subjectHits(ov),]
92
+        }
93
+    }
94
+    if(any(!sapply(x, hasBeenSmoothed)) || !(all(sameGr))) {
95
+        coef <- NULL
96
+        se.coef <- NULL
97
+        trans <- NULL
98
+    } else {
99
+        coef <- do.call(cbind, lapply(x, function(xx) getBSseq(xx, "coef")))
100
+        se.coef <- do.call(cbind, lapply(x, function(xx) getBSseq(xx, "se.coef")))
101
+    }
102
+    pData <- Reduce(combine, lapply(x, pData))
103
+    BSseq(gr = gr, M = M, Cov = Cov, coef = coef, se.coef = se.coef,
104
+          pData = pData, trans = trans, rmZeroCov = FALSE)
105
+}
106
+
... ...
@@ -26,3 +26,11 @@ data.frame2GRanges <- function(df, keepColumns = FALSE, ignoreStrand = FALSE) {
26 26
     gr
27 27
 }
28 28
 
29
+.checkAssayNames <- function(object, names) {
30
+    nms <- names(assays(object, withDimnames = FALSE))
31
+    if(!all(names %in% nms))
32
+        return(sprintf("object of class '%s' needs to have assay slots with names '%s'",
33
+                       class(object), paste0(names, collapse = ", ")))
34
+    else
35
+        NULL
36
+}
29 37
Binary files a/data/BS.chr22.rda and b/data/BS.chr22.rda differ
... ...
@@ -10,6 +10,10 @@
10 10
     \item validObject(BSseq) has been extended to also check for
11 11
     sampleNames consistency.
12 12
     \item Fixed a bug related to validity checking.
13
+    \item The class representation for class 'BSseq' has changed
14
+    completely. The new class is build on 'SummarizedExperiment' from
15
+    GenomicRanges instead of 'hasGRanges'.  Use 'x <- updateObject(x)' to
16
+    update serialized (saved) objects.
13 17
   }
14 18
 }
15 19
 
... ...
@@ -13,7 +13,7 @@
13 13
 }
14 14
 \usage{data(BS.chr22)}
15 15
 \format{
16
-  An object of class \code{"BSseq"}.
16
+  An object of class \code{BSseq}.
17 17
 }
18 18
 \details{
19 19
   All coordinates are in hg18.
... ...
@@ -12,7 +12,7 @@ BSmooth(BSseq, ns = 70, h = 1000, maxGap = 10^8,
12 12
   mc.cores = 1, keep.se = FALSE, verbose = TRUE)
13 13
 }
14 14
 \arguments{
15
-  \item{BSseq}{An object of class \code{"BSseq"}.}
15
+  \item{BSseq}{An object of class \code{BSseq}.}
16 16
   \item{ns}{The minimum number of methylation loci in a smoothing window.}
17 17
   \item{h}{The minimum smoothing window, in bases.}
18 18
   \item{maxGap}{The maximum gap between two methylation loci, before the
... ...
@@ -43,7 +43,7 @@ BSmooth(BSseq, ns = 70, h = 1000, maxGap = 10^8,
43 43
 
44 44
 }
45 45
 \value{
46
-  An object of class \code{"BSseq"}, containing smoothed values and
46
+  An object of class \code{BSseq}, containing smoothed values and
47 47
   optionally standard errors for these.
48 48
 }
49 49
 \author{
... ...
@@ -59,6 +59,7 @@ BSmooth(BSseq, ns = 70, h = 1000, maxGap = 10^8,
59 59
 }
60 60
 \examples{
61 61
 \dontrun{
62
+data(BS.chr22)
62 63
 BS.fit <- BSmooth(BS.chr22, verbose = TRUE)
63 64
 BS.fit
64 65
 }
... ...
@@ -14,7 +14,7 @@ BSmooth.tstat(BSseq, group1, group2,
14 14
   maxGap = NULL, qSd = 0.75, k = 101, mc.cores = 1, verbose = TRUE)
15 15
 }
16 16
 \arguments{
17
-  \item{BSseq}{An object of class \code{"BSseq"}.}
17
+  \item{BSseq}{An object of class \code{BSseq}.}
18 18
   \item{group1}{A vector of sample names or indexes for the \sQuote{treatment}
19 19
     group.} 
20 20
   \item{group2}{A vector of sample names or indexes for the \sQuote{control}
... ...
@@ -48,7 +48,7 @@ BSmooth.tstat(BSseq, group1, group2,
48 48
   Additional details in the reference.
49 49
 }
50 50
 \value{
51
-  An object of class \code{"BSseqTstat"}.
51
+  An object of class \code{BSseqTstat}.
52 52
 }
53 53
 \author{
54 54
   Kasper Daniel Hansen \email{khansen@jhsph.edu}
... ...
@@ -70,6 +70,7 @@ BSmooth.tstat(BSseq, group1, group2,
70 70
 if(require(bsseqData)) {
71 71
   data(keepLoci.ex)
72 72
   data(BS.cancer.ex.fit)
73
+  BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit)
73 74
   ## Remember to subset the BSseq object, see vignette for explanation
74 75
   BS.tstat <- BSmooth.tstat(BS.cancer.ex.fit[keepLoci.ex,],
75 76
                             group1 = c("C1", "C2", "C3"),
... ...
@@ -5,15 +5,13 @@
5 5
 \alias{[,BSseq-method}
6 6
 \alias{combine,BSseq,BSseq-method}
7 7
 \alias{combineList}
8
-\alias{dim,BSseq-method}
9
-\alias{ncol,BSseq-method}
10
-\alias{nrow,BSseq-method}
8
+\alias{length,BSseq-method}
11 9
 \alias{pData,BSseq-method}
12
-\alias{phenoData,BSseq-method}
13
-\alias{phenoData<-,BSseq,AnnotatedDataFrame-method}
14 10
 \alias{pData<-,BSseq,data.frame-method}
11
+\alias{pData<-,BSseq,DataFrame-method}
15 12
 \alias{sampleNames,BSseq-method}
16 13
 \alias{sampleNames<-,BSseq,ANY-method}
14
+\alias{updateObject,BSseq-method}
17 15
 \alias{show,BSseq-method}
18 16
 \alias{getBSseq}
19 17
 \alias{collapseBSseq}
... ...
@@ -31,30 +29,16 @@ An object from the class links together several pieces of information.
31 29
 (1) genomic locations stored as a \code{GRanges} object, a location by
32 30
 samples matrix of M values, a location by samples matrix of Cov
33 31
 (coverage) values and phenodata information.  In addition, there are
34
-slots for representing smoothed data.  Objects can be created by calls
35
-of the form \code{BSseq(...)}.   
32
+slots for representing smoothed data. This class is an extension of
33
+\code{SummarizedExperiment}.
36 34
 }
37 35
 \section{Slots}{
38 36
   \describe{
39
-    \item{\code{gr}:}{Object of class \code{"GRanges"} giving genomic
40
-      locations.}
41
-    \item{\code{M}:}{Object of class \code{"matrix"}.  This is a
42
-      location by sample matrix of the number of reads supporting
43
-      methylation.} 
44
-    \item{\code{Cov}:}{Object of class \code{"matrix"}.  This is a
45
-      location by sample matrix of the coverage.}
46
-    \item{\code{coef}:}{Object of class \code{"matrixOrNULL"}.  This is
47
-      an optional slot representing smoothed data.}
48
-    \item{\code{se.coef}:}{Object of class \code{"matrixOrNULL"}.  This
49
-      is an optional slot representing standard errors of the
50
-      smoothing.} 
51
-    \item{\code{trans}:}{Object of class \code{"function"}.  This
37
+    \item{\code{trans}:}{Object of class \code{function}.  This
52 38
       function transforms the \code{coef} slot from the scale the
53 39
       smoothing was done to the 0-1 methylation scale.}
54
-    \item{\code{parameters}:}{Object of class \code{"list"}.  A list of
40
+    \item{\code{parameters}:}{Object of class \code{list}.  A list of
55 41
       parameters representing for example how the data was smoothed.}
56
-    \item{\code{phenoData}:}{Object of class
57
-      \code{"AnnotatedDataFrame"}.  Sample information.}
58 42
   }
59 43
 }
60 44
 \section{Methods}{
... ...
@@ -62,17 +46,13 @@ of the form \code{BSseq(...)}.
62 46
     \item{[}{\code{signature(x = "BSseq")}: Subsetting by location
63 47
       (using integer indices) or sample (using integers or sample
64 48
       names).}
65
-    \item{dim}{The dimensions of the object (number of locations by
66
-      number of samples).}
67
-    \item{ncol}{The number of columns (equal to the number of samples).} 
68
-    \item{nrow}{The number of rows (equal to the number of genomic
69
-      locations).} 
49
+    \item{length}{Unlike for \code{SummarizedExperiment},
50
+      \code{length()} is the number of methylation loci (equal to
51
+      \code{length(granges(x))}).} 
70 52
     \item{sampleNames,sampleNames<-}{Sample names and its replacement
71
-      function for the object.}
72
-    \item{phenoData,phenoData<-}{Obtain and replace the \code{phenoData}
73
-      slot.}
53
+      function for the object.  This is an alias for \code{colnames}.}
74 54
     \item{pData,pData<-}{Obtain and replace the \code{pData} slot of the
75
-      \code{phenoData} slot.}
55
+      \code{phenoData} slot. This is an alias for \code{colData}.}
76 56
     \item{show}{The show method.}
77 57
     \item{combine}{This function combines two \code{BSSeq} objects.
78 58
       The genomic locations of the new object is the union of the
... ...
@@ -88,13 +68,13 @@ of the form \code{BSseq(...)}.
88 68
   \code{subsetByOverlaps}. 
89 69
   
90 70
   There are a number of almost methods-like functions for operating on
91
-  objects of class \code{"BSseq"}, including \code{getBSseq},
71
+  objects of class \code{BSseq}, including \code{getBSseq},
92 72
   \code{collapseBSseq}, and \code{orderBSseq}.  They are detailed below.
93 73
 
94 74
   \describe{
95 75
     \item{\code{collapseBSseq(BSseq, columns)}}{
96 76
     
97
-      is used to collapse an object of class \code{"BSseq"}.  By
77
+      is used to collapse an object of class \code{BSseq}.  By
98 78
       collapsing we simply mean that certain columns (samples) are merge
99 79
       together by summing up the methylation evidence and coverage.
100 80
       This is a useful function if you start by reading in a dataset
... ...
@@ -110,7 +90,7 @@ of the form \code{BSseq(...)}.
110 90
 
111 91
     \item{\code{orderBSseq(BSseq, seqOrder = NULL)}}{
112 92
 
113
-      simply orders an object of class \code{"BSseq"} according to
93
+      simply orders an object of class \code{BSseq} according to
114 94
       (increasing) genomic locations.  The \code{seqOrder} vector is a
115 95
       character vector of \code{seqnames(BSseq)} describing the order of
116 96
       the chromosomes.  This is useful for ordering \code{chr1} before
... ...
@@ -118,7 +98,7 @@ of the form \code{BSseq(...)}.
118 98
 
119 99
     \item{\code{chrSelectBSseq(BSseq, seqnames = NULL, order = FALSE)}}{
120 100
 
121
-      subsets and optionally reorders an object of class \code{"BSseq"}.
101
+      subsets and optionally reorders an object of class \code{BSseq}.
122 102
       The \code{seqnames} vector is a character vector of
123 103
       \code{seqnames(BSseq)} describing which chromosomes should be
124 104
       retained.  If \code{order} is \code{TRUE}, the chromosomes are
... ...
@@ -128,7 +108,7 @@ of the form \code{BSseq(...)}.
128 108
 	"se.coef", "trans",  "parameters"))}}{
129 109
 
130 110
       is a general accessor: is used to obtain a specific slot of an
131
-      object of class \code{"BSseq"}.  It is primarily intended for
111
+      object of class \code{BSseq}.  It is primarily intended for
132 112
       internal use in the package, for users we recommend \code{granges}
133 113
       to get the genomic locations, \code{getCoverage} to get the
134 114
       coverage slots and \code{getMeth} to get the smoothed values (if
... ...
@@ -148,17 +128,23 @@ of the form \code{BSseq(...)}.
148 128
       \code{Reduce(combine, list)}. }
149 129
   }
150 130
 }
151
-
131
+\section{Coercion}{
132
+  Package version 0.9.4 introduced a new version of representing
133
+  \sQuote{BSseq} objects.  You can update old serialized (saved)
134
+  objects by invoking \code{x <- udateObject(x)}.
135
+}
152 136
 \author{
153 137
   Kasper Daniel Hansen \email{khansen@jhsph.edu}
154 138
 }
155 139
 \seealso{
140
+  
156 141
   The package vignette.  \code{\link{BSseq}} for the constructor
157
-  function.  \code{\linkS4class{hasGRanges}} for accessing the genomic
158
-  locations.  \code{\link{getBSseq}}, \code{\link{getCoverage}}, and
159
-  \code{\link{getMeth}} for accessing the data stored in the object and
160
-  finally \code{\link{BSmooth}} for smoothing the bisulfite sequence
161
-  data. 
142
+  function.  \code{\linkS4class{SummarizedExperiment}}
143
+  for the underlying class.  \code{\link{getBSseq}},
144
+  \code{\link{getCoverage}}, and \code{\link{getMeth}} for accessing the
145
+  data stored in the object and finally \code{\link{BSmooth}} for
146
+  smoothing the bisulfite sequence data.
147
+
162 148
 }
163 149
 \examples{
164 150
 M <- matrix(1:9, 3,3)
... ...
@@ -8,7 +8,7 @@
8 8
 }
9 9
 \usage{
10 10
 BSseq(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
11
-  trans = NULL, parameters = NULL, phenoData = NULL, gr = NULL,
11
+  trans = NULL, parameters = NULL, pData = NULL, gr = NULL,
12 12
   pos = NULL, chr = NULL, sampleNames = NULL, rmZeroCov = FALSE)
13 13
 }
14 14
 \arguments{
... ...
@@ -18,9 +18,9 @@ BSseq(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
18 18
   \item{se.coef}{Smoothing standard errors.}
19 19
   \item{trans}{A smoothing transformation.}
20 20
   \item{parameters}{A list of smoothing parameters.}
21
-  \item{phenoData}{An object of class \code{"phenoData"}.}
21
+  \item{pData}{An \code{data.frame} or \code{DataFrame}.}
22 22
   \item{sampleNames}{A vector of sample names.}
23
-  \item{gr}{An object of type \code{"GRanges"}.}
23
+  \item{gr}{An object of type \code{GRanges}.}
24 24
   \item{pos}{A vector of locations.}
25 25
   \item{chr}{A vector of chromosomes.}
26 26
   \item{rmZeroCov}{Should genomic locations with zero coverage in all
... ...
@@ -45,12 +45,12 @@ BSseq(M = NULL, Cov = NULL, coef = NULL, se.coef = NULL,
45 45
   are added by \code{BSmooth}).
46 46
 
47 47
   \code{phenoData} is a way to specify pheno data (as known from the
48
-  \code{"ExpressionSet"} and \code{"eSet"} classes), at a minimum
48
+  \code{ExpressionSet} and \code{eSet} classes), at a minimum
49 49
   \code{sampleNames} should be given (if they are not present, the
50 50
   function uses \code{col.names(M)}).  
51 51
 }
52 52
 \value{
53
-  An object of class \code{"BSseq"}.
53
+  An object of class \code{BSseq}.
54 54
 }
55 55
 \author{
56 56
   Kasper Daniel Hansen \email{khansen@jhsph.edu}
... ...
@@ -27,14 +27,14 @@ not constructed by the user..
27 27
   \describe{
28 28
     \item{\code{stats}:}{This is a matrix with columns representing
29 29
       various statistics for methylation loci along the genome.}
30
-    \item{\code{parameters}:}{Object of class \code{"list"}.  A list of
30
+    \item{\code{parameters}:}{Object of class \code{list}.  A list of
31 31
       parameters representing how the t-statistics were computed.}
32
-    \item{\code{gr}:}{Object of class \code{"GRanges"} giving genomic
32
+    \item{\code{gr}:}{Object of class \code{GRanges} giving genomic
33 33
       locations.} 
34 34
   }
35 35
 }
36 36
 \section{Extends}{
37
-Class \code{"\linkS4class{hasGRanges}"}, directly.
37
+Class \code{\linkS4class{hasGRanges}}, directly.
38 38
 }
39 39
 \section{Methods}{
40 40
   \describe{
... ...
@@ -55,7 +55,7 @@ Class \code{"\linkS4class{hasGRanges}"}, directly.
55 55
 \seealso{
56 56
   The package vignette(s).  \code{\linkS4class{hasGRanges}} for accessing
57 57
   the genomic locations.  \code{\link{BSmooth.tstat}} for a function
58
-  that objects of class \code{"BSseqTstat"}, and \code{\link{dmrFinder}}
58
+  that objects of class \code{BSseqTstat}, and \code{\link{dmrFinder}}
59 59
   for a function that computes DMRs based on the t-statistics.  Also see
60 60
   \code{\link[bsseqData]{BS.cancer.ex.tstat}} for an example of the
61 61
   class in the \pkg{bsseqData} package.
... ...
@@ -26,7 +26,7 @@ data.frame2GRanges(df, keepColumns = FALSE, ignoreStrand = FALSE)
26 26
   \code{names} for the return object.
27 27
 }
28 28
 \value{
29
-  An object of class \code{"GRanges"}
29
+  An object of class \code{GRanges}
30 30
 }
31 31
 \author{
32 32
   Kasper Daniel Hansen \email{khansen@jhsph.edu}
... ...
@@ -63,7 +63,7 @@ dmrFinder(BSseqTstat, cutoff = NULL, qcutoff = c(0.025, 0.975),
63 63
 }
64 64
 \seealso{
65 65
   \code{\link{BSmooth.tstat}} for the function constructing the input
66
-    object, and \code{"\linkS4class{BSseqTstat}"} for its class.  In the
66
+    object, and \code{\linkS4class{BSseqTstat}} for its class.  In the
67 67
     example below, we use \code{\link[bsseqData]{BS.cancer.ex.tstat}} as
68 68
     the actual input object.  Also see the package vignette(s) for a
69 69
     detailed example.  
... ...
@@ -4,14 +4,14 @@
4 4
   Compute Fisher-tests for a BSseq object
5 5
 }
6 6
 \description{
7
-  A function to compute Fisher-tests for an object of class \code{"BSseq"}.
7
+  A function to compute Fisher-tests for an object of class \code{BSseq}.
8 8
 }
9 9
 \usage{
10 10
 fisherTests(BSseq, group1, group2, lookup = NULL,
11 11
   returnLookup = TRUE, mc.cores = 1, verbose = TRUE)
12 12
 }
13 13
 \arguments{
14
-  \item{BSseq}{An object of class \code{"BSseq"}.}
14
+  \item{BSseq}{An object of class \code{BSseq}.}
15 15
   \item{group1}{A vector of sample names or indexes for the
16 16
     \sQuote{treatment} group.} 
17 17
   \item{group2}{A vector of sample names or indexes for the
... ...
@@ -11,9 +11,9 @@ getCoverage(BSseq, regions = NULL, type = c("Cov", "M"),
11 11
   what = c("perBase", "perRegionAverage", "perRegionTotal"))
12 12
 }
13 13
 \arguments{
14
-  \item{BSseq}{An object of class \code{"BSseq"}.}
15
-  \item{regions}{An optional \code{"data.frame"} or 
16
-    \code{"GenomicRanges"} object specifying a number of genomic regions.}
14
+  \item{BSseq}{An object of class \code{BSseq}.}
15
+  \item{regions}{An optional \code{data.frame} or 
16
+    \code{GenomicRanges} object specifying a number of genomic regions.}
17 17
   \item{type}{This returns either coverage or the total
18 18
     evidence for methylation at the loci.}
19 19
   \item{what}{The type of return object, see details.}
... ...
@@ -38,14 +38,15 @@ getCoverage(BSseq, regions = NULL, type = c("Cov", "M"),
38 38
   Kasper Daniel Hansen \email{khansen@jhsph.edu}.
39 39
 }
40 40
 \seealso{
41
-  \code{\linkS4class{BSseq}} for the \code{"BSseq"} class.
41
+  \code{\linkS4class{BSseq}} for the \code{BSseq} class.
42 42
 }
43 43
 \examples{
44
-  head(getCoverage(BS.chr22, type = "M"))
45
-  reg <- GRanges(seqnames = c("chr22", "chr22"),
46
-    ranges = IRanges(start = c(1, 2*10^7), end = c(2*10^7 +1, 4*10^7)))
47
-  getCoverage(BS.chr22, regions = reg, what = "perRegionAverage")
44
+data(BS.chr22)
45
+head(getCoverage(BS.chr22, type = "M"))
46
+reg <- GRanges(seqnames = c("chr22", "chr22"),
47
+  ranges = IRanges(start = c(1, 2*10^7), end = c(2*10^7 +1, 4*10^7)))
48
+getCoverage(BS.chr22, regions = reg, what = "perRegionAverage")
48 49
   cList <- getCoverage(BS.chr22, regions = reg)
49
-  length(cList)
50
-  head(cList[[1]])
50
+length(cList)
51
+head(cList[[1]])
51 52
 }
... ...
@@ -11,9 +11,9 @@ getMeth(BSseq, regions = NULL, type = c("smooth", "raw"),
11 11
   what = c("perBase", "perRegion"), confint = FALSE, alpha = 0.95)
12 12
 }
13 13
 \arguments{
14
-  \item{BSseq}{An object of class \code{"BSseq"}.}
15
-  \item{regions}{An optional \code{"data.frame"} or 
16
-    \code{"GenomicRanges"} object specifying a number of genomic regions.}
14
+  \item{BSseq}{An object of class \code{BSseq}.}
15
+  \item{regions}{An optional \code{data.frame} or 
16
+    \code{GenomicRanges} object specifying a number of genomic regions.}
17 17
   \item{type}{This returns either smoothed or raw estimates of the methylation level.}
18 18
   \item{what}{The type of return object, see details.}
19 19
   \item{confint}{Should a confidence interval be return for the
... ...
@@ -22,15 +22,15 @@ getMeth(BSseq, regions = NULL, type = c("smooth", "raw"),
22 22
   \item{alpha}{alpha value for the confidence interval.}
23 23
 }
24 24
 \note{
25
-  A \code{"BSseq"} object needs to be smoothed by the function
25
+  A \code{BSseq} object needs to be smoothed by the function
26 26
   \code{BSmooth} in order to support \code{type = "smooth"}.
27 27
 }
28 28
 \value{
29 29
   If \code{region = NULL} the \code{what} argument is ignored.  This is
30 30
   also the only situation in which \code{confint = TRUE} is supported.
31 31
   The return value is either a matrix (\code{confint = FALSE} or a list
32
-  with three components \code{confint = TRUE} (\code{"meth"},
33
-  \code{"upper"} and \code{"lower"}), giving the methylation estimates
32
+  with three components \code{confint = TRUE} (\code{meth},
33
+  \code{upper} and \code{lower}), giving the methylation estimates
34 34
   and (optionally) confidence intervals.
35 35
 
36 36
   Confidence intervals for \code{type = "smooth"} is based on standard
... ...
@@ -55,12 +55,13 @@ Statistician 52, 119-126.
55 55
   Kasper Daniel Hansen \email{khansen@jhsph.edu}.
56 56
 }
57 57
 \seealso{
58
-  \code{\linkS4class{BSseq}} for the \code{"BSseq"} class and
58
+  \code{\linkS4class{BSseq}} for the \code{BSseq} class and
59 59
   \code{\link{BSmooth}} for smoothing such an object.
60 60
 }
61 61
 \examples{
62
-  head(getMeth(BS.chr22, type = "raw"))
63
-  reg <- GRanges(seqnames = c("chr22", "chr22"),
64
-    ranges = IRanges(start = c(1, 2*10^7), end = c(2*10^7 +1, 4*10^7)))
65
-  head(getMeth(BS.chr22, regions = reg, type = "raw", what = "perBase"))
62
+data(BS.chr22)
63
+head(getMeth(BS.chr22, type = "raw"))
64
+reg <- GRanges(seqnames = c("chr22", "chr22"),
65
+  ranges = IRanges(start = c(1, 2*10^7), end = c(2*10^7 +1, 4*10^7)))
66
+head(getMeth(BS.chr22, regions = reg, type = "raw", what = "perBase"))
66 67
 }
... ...
@@ -4,7 +4,7 @@
4 4
   Obtain statistics from a BSseqTstat object
5 5
 }
6 6
 \description{
7
-  Essentially an accessor function for the statistics of a \code{"BSseqTstat"}
7
+  Essentially an accessor function for the statistics of a \code{BSseqTstat}
8 8
   object.
9 9
 }
10 10
 \usage{
... ...
@@ -12,9 +12,9 @@ getStats(BSseqTstat, regions = NULL,
12 12
          column = c("tstat.corrected", "tstat"))
13 13
 }
14 14
 \arguments{
15
-  \item{BSseqTstat}{An object of class \code{"BSseqTstat"}.}
16
-  \item{regions}{An optional \code{"data.frame"} or
17
-    \code{"GenomicRanges"} object specifying a number of genomic
15
+  \item{BSseqTstat}{An object of class \code{BSseqTstat}.}
16
+  \item{regions}{An optional \code{data.frame} or
17
+    \code{GenomicRanges} object specifying a number of genomic
18 18
     regions.} 
19 19
   \item{column}{Which t-statistic column should be obtained.}
20 20
 }
... ...
@@ -26,9 +26,9 @@ getStats(BSseqTstat, regions = NULL,
26 26
   Kasper Daniel Hansen \email{khansen@jhsph.edu}
27 27
 }
28 28
 \seealso{
29
-  \code{\linkS4class{BSseqTstat}} for the \code{"BSseqTstat"} class, and
29
+  \code{\linkS4class{BSseqTstat}} for the \code{BSseqTstat} class, and
30 30
   \code{\link{getCoverage}} and \code{\link{getMeth}} for similar
31
-  functions, operating on objects of class \code{"BSseq"}.
31
+  functions, operating on objects of class \code{BSseq}.
32 32
 }
33 33
 \examples{
34 34
 if(require(bsseqData)) {
... ...
@@ -38,4 +38,4 @@ if(require(bsseqData)) {
38 38
      ranges = IRanges(start = c(1, 2*10^7), end = c(2*10^7 +1, 4*10^7)))
39 39
   head(getStats(BS.cancer.ex.tstat, regions = reg))
40 40
 }
41
-}
42 41
\ No newline at end of file
42
+}
... ...
@@ -20,7 +20,7 @@ binomialGoodnessOfFit(BSseq, method = c("MLE"), nQuantiles = 10^5)
20 20
   pch = 16, cex = 0.75, \ldots)
21 21
 }
22 22
 \arguments{
23
-  \item{BSseq}{An object of class \code{"BSseq"}.}
23
+  \item{BSseq}{An object of class \code{BSseq}.}
24 24
   \item{x}{A chisqGoodnessOfFit object (as produced by
25 25
     \code{poissonGoodnessOfFit} or \code{binomialGoodnessOfFit}).} 
26 26
   \item{nQuantiles}{The number of (evenly-spaced) quantiles stored in
... ...
@@ -36,7 +36,7 @@ binomialGoodnessOfFit(BSseq, method = c("MLE"), nQuantiles = 10^5)
36 36
 \details{
37 37
   
38 38
   These functions compute and plot goodness of fit statistics for
39
-  \code{"BSseq"} objects.  For each methylation loci, the Poisson
39
+  \code{BSseq} objects.  For each methylation loci, the Poisson
40 40
   goodness of fit statistic tests whether the coverage (at that loci) is
41 41
   independent and identically Poisson distributed across the samples.
42 42
   In a similar fashion, the Binomial goodness of fit statistic tests
... ...
@@ -49,7 +49,7 @@ binomialGoodnessOfFit(BSseq, method = c("MLE"), nQuantiles = 10^5)
49 49
 \value{
50 50
   The plotting method is invoked for its side effect.  Both
51 51
   \code{poissonGoodnessOfFit} and \code{binomialGoodnessOfFit} returns
52
-  an object of class \code{"chisqGoodnessOfFit"} which is a list with components
52
+  an object of class \code{chisqGoodnessOfFit} which is a list with components
53 53
  \item{chisq}{a vector of Chisq values.}
54 54
  \item{quantiles}{a vector of quantiles (of the chisq values).}
55 55
  \item{df}{degress of freedom}
... ...
@@ -62,6 +62,8 @@ binomialGoodnessOfFit(BSseq, method = c("MLE"), nQuantiles = 10^5)
62 62
 }
63 63
 \examples{
64 64
 if(require(bsseqData)) {
65
+  data(BS.cancer.ex)
66
+  BS.cancer.ex <- updateObject(BS.cancer.ex)
65 67
   gof <- poissonGoodnessOfFit(BS.cancer.ex)
66 68
   plot(gof)
67 69
 }
... ...
@@ -36,7 +36,7 @@ Objects can be created by calls of the form \code{new("hasGRanges", ...)}.
36 36
 }
37 37
 \section{Slots}{
38 38
   \describe{
39
-    \item{\code{gr}:}{Object of class \code{"GRanges"}.}
39
+    \item{\code{gr}:}{Object of class \code{GRanges}.}
40 40
   }
41 41
 }
42 42
 \section{Methods}{
... ...
@@ -23,12 +23,12 @@ plotManyRegions(BSseq, regions = NULL, extend = 0, main = "",
23 23
   verbose = TRUE)
24 24
 }
25 25
 \arguments{
26
-  \item{BSseq}{An object of class \code{"BSseq"}.}
27
-  \item{region}{A \code{"data.frame"} (with start, end and chr columns)
28
-    with 1 row or \code{"GRanges"} of length 1.  If \code{region} is
26
+  \item{BSseq}{An object of class \code{BSseq}.}
27
+  \item{region}{A \code{data.frame} (with start, end and chr columns)
28
+    with 1 row or \code{GRanges} of length 1.  If \code{region} is
29 29
     \code{NULL} the entire \code{BSseq} argument is plotted.}
30
-  \item{regions}{A \code{"data.frame"} (with start, end and chr columns)
31
-    or \code{"GRanges"}.}
30
+  \item{regions}{A \code{data.frame} (with start, end and chr columns)
31
+    or \code{GRanges}.}
32 32
   \item{extend}{Describes how much the plotting region should be
33 33
   extended in either direction.  The total width of the plot is equal to
34 34
   the width of the region plus twice \code{extend}.}
... ...
@@ -36,14 +36,14 @@ plotManyRegions(BSseq, regions = NULL, extend = 0, main = "",
36 36
   information about which genomic region is being plotted.}
37 37
   \item{addRegions}{A set of additional regions to be highlighted on the
38 38
   plots.  As the \code{regions} argument.}
39
-  \item{annoTrack}{A named list of \code{"GRanges"} objects.  Each
39
+  \item{annoTrack}{A named list of \code{GRanges} objects.  Each
40 40
   component is a track and the names of the list are the track names.
41 41
   Each track will be plotted as solid bars, and we routinely display
42 42
   information such as CpG islands, exons, etc.}
43 43
   \item{col}{The color of the methylation estimates, see details.}
44 44
   \item{lty}{The line type of the methylation estimates, see details.}
45 45
   \item{lwd}{The line width of the methylation estimates, see details.}
46
-  \item{BSseqTstat}{An object of class \code{"BSseqTstat"}.  If present,
46
+  \item{BSseqTstat}{An object of class \code{BSseqTstat}.  If present,
47 47
   a new panel will be shown with the t-statistics.}
48 48
   \item{mainWithWidth}{Should the default title include information
49 49
   about width of the plot region.}
... ...
@@ -48,7 +48,7 @@
48 48
   (\url{http://www.bioinformatics.babraham.ac.uk/projects/download.html#bismark}) 
49 49
 }
50 50
 \value{
51
-  An object of class \code{"BSseq"}.
51
+  An object of class \code{BSseq}.
52 52
 }
53 53
 
54 54
 \seealso{
... ...
@@ -40,8 +40,8 @@ read.bsmooth(dirs, sampleNames = NULL, seqnames = NULL,
40 40
   We are working on making this function faster and less memory hungry.
41 41
 }
42 42
 \value{
43
-  Either an object of class \code{"BSseq"} (if \code{returnRaw = FALSE})
44
-  or a list of \code{"GRanges"} which each component coming from a
43
+  Either an object of class \code{BSseq} (if \code{returnRaw = FALSE})
44
+  or a list of \code{GRanges} which each component coming from a
45 45
   directory. 
46 46
 }
47 47
 \seealso{
... ...
@@ -37,7 +37,7 @@ read.umtab2(dirs, sampleNames = NULL, rmZeroCov = FALSE,
37 37
 }
38 38
 \value{
39 39
   Both functions returns lists, the components are
40
-  \item{BSdata}{An object of class \code{"BSseq"} containing the
40
+  \item{BSdata}{An object of class \code{BSseq} containing the
41 41
     methylation evidence.}
42 42
   \item{GC}{A vector of local GC content values.}
43 43
   \item{Map}{A vector of local mapability values.}
... ...
@@ -127,6 +127,7 @@ We can have a look at some data from \cite{Lister:2009}, specifically chromosome
127 127
 cell line.
128 128
 
129 129
 <<BSchr22>>=
130
+data(BS.chr22)
130 131
 BS.chr22
131 132
 @ 
132 133
 
... ...
@@ -59,6 +59,8 @@ the current version of the BSmooth pipeline uses a slightly different alignment
59 59
 
60 60
 The following object contains the unsmoothed ``raw'' summarized alignment data.
61 61
 <<showData>>=
62
+data(BS.cancer.ex)
63
+BS.cancer.ex <- updateObject(BS.cancer.ex)
62 64
 BS.cancer.ex
63 65
 pData(BS.cancer.ex)
64 66
 @ 
... ...
@@ -83,6 +85,7 @@ support on MS Windows due to a limitation of the operating system.
83 85
 For ease of use, the \Rpackage{bsseqData} includes the result of this command:
84 86
 <<showDataFit>>=
85 87
 data(BS.cancer.ex.fit)
88
+BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit)
86 89
 BS.cancer.ex.fit
87 90
 @ 
88 91