Browse code

Fully deprecate basePath in favor of experimentPath Work on SRSet - readInd (class definition) readInds(undefined function call) replaced by readIndex - readIndex, readCount initialized to integer(0) so new("SRSet") creates valid objects - experimentPath method to directly access sourcePath - add implied sampleNames,ExperimentPath-method for non-RochePath ExperimentPath Misc code tidy

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@33714 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 14/09/2008 01:26:21
Showing 12 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: ShortRead
2 2
 Type: Package
3 3
 Title: Base classes and methods for high-throughput short-read sequencing data.
4
-Version: 0.1.52
4
+Version: 0.1.53
5 5
 Author: Martin Morgan, Michael Lawrence, Simon Anders
6 6
 Maintainer: Biocore Team c/o BioC user list <bioconductor@stat.math.ethz.ch>
7 7
 Description: Base classes, functions, and methods for representation of
... ...
@@ -103,14 +103,14 @@ setClass("ExperimentPath", contains = c(".ShortReadBase"),
103 103
 setClass("SRSet", contains = ".ShortReadBase",
104 104
          representation = representation(
105 105
            sourcePath="ExperimentPath", # for lazy loading
106
-           readInds="integer", # for tracking subsets and sorting
107
-           readCounts="integer", # counts of reads in each sample
106
+           readIndex="integer", # for tracking subsets and sorting
107
+           readCount="integer", # counts of reads in each sample
108 108
            phenoData="AnnotatedDataFrame", # experimental design
109 109
            readData="AnnotatedDataFrame"), # arbitrary read annotations
110 110
          prototype = prototype(
111 111
            sourcePath=new("ExperimentPath"),
112
-           readInds=NA_integer_,
113
-           readCounts=NA_integer_,
112
+           readIndex=integer(0),
113
+           readCount=integer(0),
114 114
            phenoData=new("AnnotatedDataFrame"),
115 115
            readData=new("AnnotatedDataFrame")),
116 116
          validity = .srValidity)
... ...
@@ -101,6 +101,14 @@ setGeneric("alphabetScore", function(object, ...) {
101 101
 setGeneric("readAligned", function(dirPath, pattern=character(0), ...)
102 102
            standardGeneric("readAligned"), signature="dirPath")
103 103
 
104
+## ExperimentPath
105
+
106
+experimentPath <- function(object, ...) {
107
+    slot(object, "basePath")
108
+}
109
+
110
+setGeneric("experimentPath")
111
+
104 112
 ## *Set
105 113
 
106 114
 setGeneric("qa", function(dirPath, ...) standardGeneric("qa"))
... ...
@@ -25,19 +25,19 @@ setMethod(".srValidity", "ExperimentPath", function(object) {
25 25
     }
26 26
 }
27 27
 
28
-ExperimentPath <- function(experimentPath, ...) {
28
+ExperimentPath <- function(experimentPath=NA_character_, ...) {
29 29
     new("ExperimentPath", basePath=experimentPath, ...)
30 30
 }
31 31
 
32
-experimentPath <- basePath <- function(object, ...) {
33
-    slot(object, "basePath")
34
-}
35
-
36 32
 basePath <- function(object, ...) {
37 33
     .Deprecated("experimentPath")
38 34
     experimentPath(object, ...)
39 35
 }
40 36
 
37
+setMethod("sampleNames", "ExperimentPath", function(object) {
38
+    character(0)
39
+})
40
+
41 41
 .show_additionalPathSlots <- function(object) { # for derived classes
42 42
     catPath <- function(nm) {
43 43
         vals <- do.call(nm, list(object))
... ...
@@ -1,13 +1,13 @@
1
-RochePath <- function(basePath,
2
-                      readPath=.srPath(basePath, "^run"),
1
+RochePath <- function(experimentPath=NA_character_,
2
+                      readPath=.srPath(experimentPath, "^run"),
3 3
                       qualPath=readPath,
4 4
                       ..., verbose=FALSE) {
5 5
     if (verbose) {
6
-      .checkPath(basePath)
6
+      .checkPath(experimentPath)
7 7
       .checkPath(readPath)
8 8
       .checkPath(qualPath)
9 9
     }
10
-    new("RochePath", ..., basePath=basePath,
10
+    new("RochePath", ..., basePath=experimentPath,
11 11
         readPath=readPath, qualPath=qualPath)
12 12
 }
13 13
 
... ...
@@ -52,7 +52,11 @@ setMethod("readQual", "RochePath", .readQual_RochePath)
52 52
 setMethod("read454", "RochePath", .read454_RochePath)
53 53
 
54 54
 .sampleNames_RochePath <- function(object) {
55
-  sub("_.*", "", basename(.file_names(readPath(object), "\\.fna")))
55
+    path <- readPath(object)
56
+    if (!is.na(path))
57
+        sub("_.*", "", basename(.file_names(path, "\\.fna")))
58
+    else
59
+        callNextMethod()
56 60
 }
57 61
 
58 62
 setMethod("sampleNames", "RochePath", .sampleNames_RochePath)
... ...
@@ -1,62 +1,68 @@
1 1
 .SRSet_validity <- function(object) {
2
-  msg <- NULL
3
-  len <- length(readInd(object))
4
-  rlen <- c(readData = nrow(readData(object)))
5
-  if (!all(rlen==len)) {
6
-    bad <- rlen!=len
7
-    msg <- c(msg,
8
-             sprintf("read length mismatch: expected %d, found:\n  %s",
9
-                     rlen, paste(names(rlen)[bad], rlen[bad], sep="=",
10
-                                collapse=", ")))
11
-  }
12
-  snames <- sampleNames(experimentPath(object))
13
-  slen <- length(snames)
14
-  oslen <- c(phenoData = nrow(phenoData(object)),
15
-             readCount = length(readCount(object)))
16
-  if (!all(oslen==slen)) {
17
-    bad <- oslen!=slen
18
-    msg <- c(msg,
19
-             sprintf("sample length mismatch: expected %d, found:\n  %s",
20
-                     slen, paste(names(oslen)[bad], oslen[bad], sep="=",
21
-                                collapse=", ")))
22
-  }
23
-  osnames <- sampleNames(object)
24
-  stest <- snames == osnames
25
-  if (!all(stest))
26
-    msg <- c(msg,
27
-             sprintf("sample names mismatch:\n  %s",
28
-                     slen, paste(snames[!stest], osnames[!stest],
29
-                                 sep = "!=", collapse = ", ")))
30
-  rind <- readInd(object)
31
-  if (!all(rind > 0 & rind <= len))
32
-    msg <- c(msg, "values in 'readInd' must be > 0 and <= number of reads")
33
-  rcount <- readCount(object)
34
-  if (!all(rcount >= 0))
35
-    msg <- c(msg, "values in 'readCount' must be non-negative")
36
-  if (sum(rcount) != len)
37
-    msg <- c(msg,
38
-             sprintf("'sum(readCount)', %d, must equal the number of reads, %d",
39
-                     sum(rcount), len))
40
-  if (is.null(msg)) TRUE else msg
2
+    msg <- NULL
3
+    len <- length(readIndex(object))
4
+    rlen <- c(readData = nrow(readData(object)))
5
+    if (!all(rlen==len)) {
6
+        bad <- rlen!=len
7
+        msg <- c(msg,
8
+                 sprintf("read length mismatch: expected %d, found:\n  %s",
9
+                         rlen, paste(names(rlen)[bad], rlen[bad],
10
+                                     sep="=", collapse=", ")))
11
+    }
12
+    snames <- sampleNames(sourcePath(object))
13
+    slen <- length(snames)
14
+    oslen <- c(phenoData = nrow(phenoData(object)),
15
+               readCount = length(readCount(object)))
16
+    if (!all(oslen==slen)) {
17
+        bad <- oslen!=slen
18
+        msg <- c(msg,
19
+                 sprintf("sample length mismatch: expected %d, found:\n  %s",
20
+                         slen, paste(names(oslen)[bad], oslen[bad],
21
+                                     sep="=", collapse=", ")))
22
+    }
23
+    osnames <- sampleNames(object)
24
+    stest <- snames == osnames
25
+    if (!all(stest))
26
+        msg <- c(msg,
27
+                 sprintf("sample names mismatch:\n  %s",
28
+                         slen, paste(snames[!stest], osnames[!stest],
29
+                                     sep = "!=", collapse = ", ")))
30
+    rind <- readIndex(object)
31
+    if (!all(rind > 0 & rind <= len))
32
+        msg <- c(msg, "values in 'readIndex' must be > 0 and <= number of reads")
33
+    rcount <- readCount(object)
34
+    if (!all(rcount >= 0))
35
+        msg <- c(msg, "values in 'readCount' must be non-negative")
36
+    if (sum(rcount) != len)
37
+        msg <- c(msg,
38
+                 sprintf("'sum(readCount)', %d, must equal the number of reads, %d",
39
+                         sum(rcount), len))
40
+    if (is.null(msg)) TRUE else msg
41 41
 }
42 42
 
43 43
 setMethod(".srValidity", "SRSet", .SRSet_validity)
44 44
 
45
+.make_getter(c("readData", "sourcePath", "readIndex", "readCount"))
46
+
47
+setMethod("experimentPath", "SRSet", function(object, ...) {
48
+    callGeneric(sourcePath(object), ...)
49
+})
50
+
45 51
 setMethod("sampleNames", "SRSet", function(object) {
46
-  sampleNames(phenoData(object))
52
+    sampleNames(phenoData(object))
47 53
 })
48 54
           
49 55
 setMethod("show", "SRSet", function(object) {
50 56
     callNextMethod()
51
-    cat("basePath(sourcePath(object)):\n  ",
52
-        basePath(sourcePath(object)), "\n", sep="")
57
+    cat("experimentPath(object): ", experimentPath(object), "\n",
58
+        sep="")
53 59
 })
54 60
 
55 61
 setMethod("detail", "SRSet", function(object, ...) {
56 62
     callNextMethod()
57
-    cat("\n")
63
+    cat("\nsourcePath\n")
58 64
     detail(sourcePath(object), ...)
59
-    cat("\nclass: AnnotatedDataFrame\n")
65
+    cat("\nphenoData\n")
60 66
     pd <- phenoData(object)
61 67
     cat("pData:\n")
62 68
     print(pData(pd))
... ...
@@ -66,7 +72,6 @@ setMethod("detail", "SRSet", function(object, ...) {
66 72
 
67 73
 setMethod("phenoData", "SRSet", function(object) object@phenoData)
68 74
 
69
-.make_getter(c("readData", "sourcePath"))
70 75
 
71 76
 ## proposed
72 77
 ##setMethod("readSRQ", "SRSet", function(object) readSRQ(sourcePath(object)))
... ...
@@ -5,47 +5,26 @@ setMethod(".srValidity", "SolexaPath", function(object) {
5 5
     if (is.null(msg)) TRUE else msg
6 6
 })
7 7
 
8
-.solexaPath <- function(path, pattern) {
9
-    path <- path.expand(path)
10
-    tryCatch({
11
-        res <- list.files(path, pattern=pattern, full.name=TRUE)
12
-        if (length(res)==0) NA_character_
13
-        else res
14
-    }, warning=function(warn) NA_character_)
15
-}
16
-
17
-SolexaPath <- function(experimentPath,
18
-                       dataPath=.solexaPath(experimentPath, "Data"),
19
-                       scanPath=.solexaPath(dataPath, "GoldCrest"),
20
-                       imageAnalysisPath=.solexaPath(dataPath, "^C"),
21
-                       baseCallPath=.solexaPath(imageAnalysisPath,
8
+SolexaPath <- function(experimentPath=NA_character_,
9
+                       dataPath=.srPath(experimentPath, "Data"),
10
+                       scanPath=.srPath(dataPath, "GoldCrest"),
11
+                       imageAnalysisPath=.srPath(dataPath, "^C"),
12
+                       baseCallPath=.srPath(imageAnalysisPath,
22 13
                          "^Bustard"),
23
-                       analysisPath=.solexaPath(baseCallPath,
14
+                       analysisPath=.srPath(baseCallPath,
24 15
                          "^GERALD"),
25 16
                        ..., verbose=FALSE) {
26
-    checkPath <- function(path) {
27
-        nm <- deparse(substitute(path))
28
-        if (length(path)==0) {
29
-            warning(nm, " not defined")
30
-        } else {
31
-            for (p in path)
32
-                if (!file.exists(p)) 
33
-                    warning(nm, " '", p, "' does not exist")
34
-        }
35
-    }
36 17
     if (verbose) {
37
-        checkPath(experimentPath)
38
-        checkPath(dataPath)
39
-        checkPath(scanPath)
40
-        checkPath(imageAnalysisPath)
41
-        checkPath(baseCallPath)
42
-        checkPath(analysisPath)
18
+        .checkPath(experimentPath)
19
+        .checkPath(dataPath)
20
+        .checkPath(scanPath)
21
+        .checkPath(imageAnalysisPath)
22
+        .checkPath(baseCallPath)
23
+        .checkPath(analysisPath)
43 24
     }
44
-    new("SolexaPath", ...,
45
-        basePath=experimentPath,
46
-        dataPath=dataPath, scanPath=scanPath,
47
-        imageAnalysisPath=imageAnalysisPath, baseCallPath=baseCallPath,
48
-        analysisPath=analysisPath)
25
+    new("SolexaPath", ..., basePath=experimentPath, dataPath=dataPath,
26
+        scanPath=scanPath, imageAnalysisPath=imageAnalysisPath,
27
+        baseCallPath=baseCallPath, analysisPath=analysisPath)
49 28
 }
50 29
 
51 30
 .make_getter(slotNames("SolexaPath"))
... ...
@@ -39,7 +39,7 @@
39 39
   \describe{
40 40
     \item{\code{basePath}}{See above.}
41 41
   }
42
-  The slot is accessed with \code{basePath} or \code{experimentPath}.
42
+  The slot is accessed with \code{experimentPath}.
43 43
 }
44 44
 \section{Extends}{
45 45
   Class \code{"\linkS4class{.ShortReadBase}"}, directly.
... ...
@@ -28,6 +28,7 @@
28 28
 \alias{alphabetByCycle,FastqQuality-method}
29 29
 \alias{srsort,FastqQuality-method}
30 30
 \alias{srorder,FastqQuality-method}
31
+\alias{srrank,FastqQuality-method}
31 32
 \alias{srduplicated,FastqQuality-method}
32 33
 
33 34
 \title{Quality scores for short reads and their alignments}
... ...
@@ -127,11 +128,12 @@
127 128
 
128 129
     \item{srsort}{\code{signature(x = "FastqQuality")}:}
129 130
     \item{srorder}{\code{signature(x = "FastqQuality")}:}
131
+    \item{srrank}{\code{signature(x = "FastqQuality")}:}
130 132
     \item{srduplicated}{\code{signature(x = "FastqQuality")}:
131 133
     
132
-      Apply \code{\link{srsort}}, \code{srorder}, \code{srduplicated} to
133
-      quality scores, returning objects as described on the appropriate
134
-      help page.}
134
+      Apply \code{\link{srsort}}, \code{srorder}, \code{srrank}, and
135
+      \code{srduplicated} to quality scores, returning objects as
136
+      described on the appropriate help page.}
135 137
 
136 138
   }
137 139
 }
... ...
@@ -6,6 +6,7 @@
6 6
 % methods
7 7
 \alias{SolexaSet,SolexaPath-method}
8 8
 \alias{qa,SolexaPath-method}
9
+\alias{report,SolexaPath-method}
9 10
 \alias{show,SolexaPath-method}
10 11
 \alias{detail,SolexaPath-method}
11 12
 % transforming methods
... ...
@@ -121,13 +122,20 @@ Class \code{"\linkS4class{.ShortReadBase}"}, by class ".Solexa", distance 2.
121 122
       \code{signature(dirPath="SolexaPath", pattern="character(0)", run=1, ...)}:
122 123
 
123 124
       Use \code{analysisPath(dirPath)[[run]]} as the directorpy path and
124
-      \code{pattern="s_[1_8]_export.txt"} as the pattern for discovering solexa
125
+      \code{pattern="s_[1-8]_export.txt"} as the pattern for discovering solexa
125 126
       \code{export}-formatted fileds, returning a
126 127
       \code{\linkS4class{SolexaExportQA}} object summarizing quality
127 128
       assessment. If \code{Rmpi} has been initiated, quality assessment
128 129
       calculations are distributed across available nodes (one node per
129 130
       export file.)}
130 131
 
132
+    \item{report}{
133
+      \code{signature(x, ..., dest=tempfile(), type="pdf")}: Use
134
+      \code{qa(x, ...)} to generate quality assessment measures, and
135
+      use these to generate a quality assessment report at location
136
+      \code{dest} of type \code{type} (e.g., \sQuote{pdf}).
137
+    }
138
+
131 139
     \item{SolexaSet}{\code{signature(path = "SolexaPath")}: create a
132 140
       \code{\linkS4class{SolexaSet}} object based on \code{path}.}
133 141
 
... ...
@@ -14,7 +14,6 @@
14 14
 \alias{alignData}
15 15
 % Solexa
16 16
 \alias{experimentPath}
17
-\alias{basePath}
18 17
 \alias{dataPath}
19 18
 \alias{scanPath}
20 19
 \alias{imageAnalysisPath}
... ...
@@ -48,7 +47,6 @@ alignQuality(object, ...)
48 47
 alignData(object, ...)
49 48
 ## Solexa
50 49
 experimentPath(object, ...)
51
-basePath(object, ...)
52 50
 dataPath(object, ...)
53 51
 scanPath(object, ...)
54 52
 imageAnalysisPath(object, ...)
55 53
new file mode 100644
... ...
@@ -0,0 +1,28 @@
1
+\name{deprecated}
2
+
3
+\alias{deprecated}
4
+\alias{basePath}
5
+
6
+\title{Deprecated and defunct functions}
7
+
8
+\description{
9
+
10
+  These functions were introduced but are now deprecated or defunct.
11
+
12
+}
13
+\usage{
14
+
15
+basePath(object, ...)
16
+
17
+}
18
+
19
+\arguments{
20
+
21
+  \item{object}{For \code{basePath}, and object of class
22
+  \code{ExperimentPath}.}
23
+
24
+  \item{...}{Additional arguments.}
25
+
26
+}
27
+\author{Martin Morgan}
28
+\keyword{manip}