Browse code

crlmm returns a SnpSet object

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38275 bc3139a8-67e5-0310-9ffc-ced21a209358

Benilton Carvalho authored on 28/03/2009 04:43:33
Showing 13 changed files

... ...
@@ -49,9 +49,12 @@ BioC data packages
49 49
 2009-03-25 Rob Scharpf - committed version 1.0.61
50 50
 * update to copynumber vignette
51 51
 
52
+2009-03-27 B Carvalho - committed version 1.0.62
52 53
 
54
+* crlmm() returns a SnpSet object (from Biobase)
53 55
 
56
+* fixed standard vignette to accommodate this change
54 57
 
58
+* redefined calls/confs to accommodate this change
55 59
 
56
-
57
-
60
+* fixed documentation to acommodate this change
... ...
@@ -1,7 +1,7 @@
1 1
 Package: crlmm
2 2
 Type: Package
3 3
 Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for SNP 5.0 and 6.0 arrays.
4
-Version: 1.0.61
4
+Version: 1.0.62
5 5
 Date: 2008-12-28
6 6
 Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>
7 7
 Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu>
... ...
@@ -10,7 +10,7 @@ License: Artistic-2.0
10 10
 Imports: affyio, preprocessCore, utils, stats, genefilter, splines, Biobase, mvtnorm
11 11
 Suggests: hapmapsnp5, genomewidesnp5Crlmm, genomewidesnp6Crlmm (>= 1.0.1), methods, GGdata, snpMatrix
12 12
 Collate: AllClasses.R
13
-         crlmm-methods.R
13
+         methods-SnpSet.R
14 14
 	 cnrma-functions.R
15 15
          crlmm-functions.R
16 16
          snprma-functions.R
... ...
@@ -1,7 +1,4 @@
1 1
 useDynLib("crlmm", .registration=TRUE)
2
-export("crlmm", "list.celfiles", "computeCopynumber", "cnrma", "celDates")
3
-exportMethods("list2crlmmSet", "calls", "confs")
4
-exportClasses("crlmmSet")
5 2
 import(Biobase)
6 3
 importFrom(affyio, read.celfile.header, read.celfile)
7 4
 importFrom(preprocessCore, normalize.quantiles.use.target)
... ...
@@ -10,3 +7,5 @@ importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, sd)
10 7
 importFrom(genefilter, rowSds)
11 8
 importFrom(mvtnorm, dmvnorm)
12 9
 
10
+export("crlmm", "list.celfiles", "computeCopynumber", "cnrma", "celDates")
11
+exportMethods("calls", "confs")
... ...
@@ -189,7 +189,8 @@ instantiateObjects <- function(calls, NP, plate, envir, chrom, A, B,
189 189
 			       gender, SNRmin=5, SNR){
190 190
 	envir[["chrom"]] <- chrom
191 191
 	CHR_INDEX <- paste(chrom, "index", sep="")
192
-	data(list=CHR_INDEX, package="genomewidesnp6Crlmm")	
192
+	data(list=CHR_INDEX, package="genomewidesnp6Crlmm")
193
+        ## Rob, where does 'index[[1]]' come from?
193 194
 	A <- A[index[[1]], SNR > SNRmin]
194 195
 	B <- B[index[[1]], SNR > SNRmin]
195 196
 	calls <- calls[index[[1]], SNR > SNRmin]
... ...
@@ -43,7 +43,7 @@ crlmm <- function(filenames, row.names=TRUE, col.names=TRUE,
43 43
 
44 44
   res2[["SNR"]] <- res[["SNR"]]
45 45
   res2[["SKW"]] <- res[["SKW"]]
46
-  return(res2)
46
+  return(list2SnpSet(res2, returnParams=returnParams))
47 47
 }
48 48
 
49 49
 crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
... ...
@@ -122,9 +122,8 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
122 122
                                 newparams[["N"]][, 3]) > recallMin &
123 123
                                 !apply(regionInfo, 1, any)),
124 124
                                 autosomeIndex)
125
-  
126 125
   if(length(Index) < recallRegMin){
127
-    warning("Recallibration not possible.")
126
+    warning("Recalibration not possible.")
128 127
     newparams <- params
129 128
     dev <- vector("numeric", nrow(newparams[["centers"]]))
130 129
     SS <- matrix(Inf, 3, 3)
... ...
@@ -221,9 +220,9 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL,
221 220
   
222 221
   if(verbose) message("Done.")
223 222
   if (returnParams){
224
-    return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, params=params, DD=DD, covDD=SS))
223
+    return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, params=params, DD=DD, covDD=SS, gender=gender, pkgname=pkgname))
225 224
   }else{
226
-    return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, DD=DD, covDD=SS))
225
+    return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, DD=DD, covDD=SS, gender=gender, pkgname=pkgname))
227 226
   }
228 227
 }
229 228
 
230 229
deleted file mode 100644
... ...
@@ -1,27 +0,0 @@
1
-## Methods for crlmm
2
-
3
-setGeneric("calls", function(x) standardGeneric("calls"))
4
-setMethod("calls", "crlmmSet", function(x) assayData(x)$calls)
5
-
6
-setGeneric("confs", function(x) standardGeneric("confs"))
7
-setMethod("confs", "crlmmSet", function(x) assayData(x)$confs)
8
-
9
-setGeneric("list2crlmmSet", function(x) standardGeneric("list2crlmmSet"))
10
-setMethod("list2crlmmSet", "list",
11
-          function(x){
12
-            pd <- data.frame(SNR=x[["SNR"]],
13
-                             row.names=colnames(x[["calls"]]))
14
-            pdv <- data.frame(labelDescription=c("Signal-to-noise Ratio"),
15
-                              row.names=c("SNR"))
16
-            fd <- data.frame(SNPQC=x[["SNPQC"]],
17
-                             row.names=rownames(x[["calls"]]))
18
-            fdv <- data.frame(labelDescription=c("SNP Quality Score"),
19
-                              row.names=c("SNPQC"))
20
-            new("crlmmSet",
21
-                assayData=assayDataNew("lockedEnvironment",
22
-                  calls=x[["calls"]], confs=x[["confs"]]),
23
-                phenoData=new("AnnotatedDataFrame",
24
-                  data=pd, varMetadata=pdv),
25
-                featureData=new("AnnotatedDataFrame",
26
-                  data=fd, varMetadata=fdv))
27
-          })
28 0
new file mode 100644
... ...
@@ -0,0 +1,7 @@
1
+## Methods for crlmm
2
+
3
+setGeneric("calls", function(x) standardGeneric("calls"))
4
+setMethod("calls", "SnpSet", function(x) assayData(x)$call)
5
+
6
+setGeneric("confs", function(x) standardGeneric("confs"))
7
+setMethod("confs", "SnpSet", function(x) assayData(x)$callProbability)
... ...
@@ -42,3 +42,95 @@ getVarInEnv <- function(dataset, environ=.crlmmPkgEnv){
42 42
 		stop("Variable ", dataset, " not found in .crlmmPkgEnv")
43 43
 	environ[[dataset]]
44 44
 }
45
+
46
+list2SnpSet <- function(x, returnParams=FALSE){
47
+  pd <- data.frame(SNR=x[["SNR"]], gender=x[["gender"]],
48
+                   row.names=colnames(x[["calls"]]))
49
+  pdv <- data.frame(labelDescription=c("Signal-to-noise Ratio",
50
+                      "Gender: Male (1) and Female (2)"),
51
+                    row.names=c("SNR", "gender"))
52
+  recall <- length(x[["DD"]]) > 1
53
+  if (returnParams){
54
+    if (recall){
55
+      fd <- data.frame(SNPQC=x[["SNPQC"]],
56
+                       cAA=x[["params"]][["centers"]][,1],
57
+                       cAB=x[["params"]][["centers"]][,2],
58
+                       cBB=x[["params"]][["centers"]][,3],
59
+                       sAA=x[["params"]][["scales"]][,1],
60
+                       sAB=x[["params"]][["scales"]][,2],
61
+                       sBB=x[["params"]][["scales"]][,3],
62
+                       nAA=x[["params"]][["N"]][,1],
63
+                       nAB=x[["params"]][["N"]][,2],
64
+                       nBB=x[["params"]][["N"]][,3],
65
+                       spAA=x[["DD"]][,1],
66
+                       spAB=x[["DD"]][,2],
67
+                       spBB=x[["DD"]][,3],
68
+                       row.names=rownames(x[["calls"]]))
69
+      fdv <- data.frame(labelDescription=c(
70
+                          "SNP Quality Score",
71
+                          "Center AA", "Center AB", "Center BB",
72
+                          "Scale AA", "Scale AB", "Scale BB",
73
+                          "N AA", "N AB", "N BB",
74
+                          "Shift in parameters AA",
75
+                          "Shift in parameters AB",
76
+                          "Shift in parameters BB"),
77
+                        row.names=c(
78
+                          "SNPQC",
79
+                          "cAA", "cAB", "cBB",
80
+                          "sAA", "sAB", "sBB",
81
+                          "nAA", "nAB", "nBB",
82
+                          "spAA", "spAB", "spBB"))
83
+    }else{
84
+      fd <- data.frame(SNPQC=x[["SNPQC"]],
85
+                       cAA=x[["params"]][["centers"]][,1],
86
+                       cAB=x[["params"]][["centers"]][,2],
87
+                       cBB=x[["params"]][["centers"]][,3],
88
+                       sAA=x[["params"]][["scales"]][,1],
89
+                       sAB=x[["params"]][["scales"]][,2],
90
+                       sBB=x[["params"]][["scales"]][,3],
91
+                       nAA=x[["params"]][["N"]][,1],
92
+                       nAB=x[["params"]][["N"]][,2],
93
+                       nBB=x[["params"]][["N"]][,3],
94
+                       row.names=rownames(x[["calls"]]))
95
+      fdv <- data.frame(labelDescription=c(
96
+                          "SNP Quality Score",
97
+                          "Center AA", "Center AB", "Center BB",
98
+                          "Scale AA", "Scale AB", "Scale BB",
99
+                          "N AA", "N AB", "N BB",
100
+                          "Shift in parameters AA",
101
+                          "Shift in parameters AB",
102
+                          "Shift in parameters BB"),
103
+                        row.names=c(
104
+                          "SNPQC",
105
+                          "cAA", "cAB", "cBB",
106
+                          "sAA", "sAB", "sBB",
107
+                          "nAA", "nAB", "nBB"))
108
+    }
109
+  }else{
110
+    if (recall){
111
+      fd <- data.frame(SNPQC=x[["SNPQC"]],
112
+                       spAA=x[["DD"]][,1],
113
+                       spAB=x[["DD"]][,2],
114
+                       spBB=x[["DD"]][,3],
115
+                       row.names=rownames(x[["calls"]]))
116
+      fdv <- data.frame(labelDescription=c("SNP Quality Score",
117
+                          "Shift in parameters AA",
118
+                          "Shift in parameters AB",
119
+                          "Shift in parameters BB"),
120
+                        row.names=c("SNPQC", "spAA", "spAB", "spBB"))
121
+    }else{
122
+      fd <- data.frame(SNPQC=x[["SNPQC"]],
123
+                       row.names=rownames(x[["calls"]]))
124
+      fdv <- data.frame(labelDescription=c("SNP Quality Score"),
125
+                        row.names=c("SNPQC"))
126
+    }
127
+  }
128
+  new("SnpSet",
129
+      assayData=assayDataNew("lockedEnvironment",
130
+        call=x[["calls"]], callProbability=x[["confs"]]),
131
+      phenoData=new("AnnotatedDataFrame",
132
+        data=pd, varMetadata=pdv),
133
+      featureData=new("AnnotatedDataFrame",
134
+        data=fd, varMetadata=fdv),
135
+      annotation=x[["pkgname"]])
136
+}
... ...
@@ -1,7 +1,7 @@
1 1
 #####################################
2 2
 ### FOR CRLMM
3 3
 #####################################
4
-- Decide on output format (BC vote: eSet-like)
4
+- Fix downstream vignette
5 5
 - Add RS ids to annotation packages
6 6
 - Allele plots
7 7
 - M v S plots
... ...
@@ -14,7 +14,7 @@
14 14
 
15 15
 \begin{document}
16 16
 \title{Genotyping with the \Rpackage{crlmm} Package}
17
-\date{January, 2009}
17
+\date{March, 2009}
18 18
 \author{Benilton Carvalho}
19 19
 \maketitle
20 20
 
... ...
@@ -56,20 +56,21 @@ celFiles <- list.celfiles(path, full.names=TRUE)
56 56
 system.time(crlmmResult <- crlmm(celFiles, verbose=FALSE))
57 57
 @ 
58 58
 
59
-The \Robject{crlmmResult} is a list with the following components:
59
+The \Robject{crlmmResult} is a \Rclass{SnpSet} (see Biobase) object.
60 60
 \begin{itemize}
61 61
 \item \Robject{calls}: genotype calls (1 - AA; 2 - AB; 3 - BB);
62 62
 \item \Robject{confs}: confidence scores, which can be translated to probabilities by using:
63 63
   \[ 1-2^-(\mbox{confs}/1000), \] although we prefer this
64 64
   representation as it saves a significant amount of memory;
65 65
 \item \Robject{SNPQC}: SNP quality score;
66
-\item \Robject{batchQC}: Batch quality score;
66
+%%\item \Robject{batchQC}: Batch quality score;
67 67
 \item \Robject{SNR}: Signal-to-noise ratio.
68 68
 \end{itemize}
69 69
 
70 70
 <<out>>=
71
-crlmmResult[["calls"]][1:10,]
72
-crlmmResult[["confs"]][1:10,]
71
+calls(crlmmResult)[1:10,]
72
+confs(crlmmResult)[1:10,]
73
+crlmmResult[["SNR"]]
73 74
 @ 
74 75
 
75 76
 \section{Details}
... ...
@@ -10,8 +10,10 @@ Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays.
10 10
 \details{
11 11
 Index:
12 12
 \preformatted{
13
-crlmm                   Genotype SNP 5.0 or 6.0 samples.
14 13
 crlmm-package           New implementation of the CRLMM Algorithm.
14
+crlmm                   Genotype SNP 5.0 or 6.0 samples.
15
+calls                   Accessor for genotype calls.
16
+confs                   Accessor for confidences.
15 17
 }
16 18
 The 'crlmm' package reimplements the CRLMM algorithm present in the
17 19
 'oligo' package. This implementation primes for efficient genotyping of
... ...
@@ -1,6 +1,10 @@
1 1
 \name{crlmm}
2 2
 \alias{crlmm}
3
-%- Also NEED an '\alias' for EACH other topic documented here.
3
+\alias{calls}
4
+\alias{calls,SnpSet-method}
5
+\alias{confs}
6
+\alias{confs,SnpSet-method}
7
+
4 8
 \title{Genotype oligonucleotide arrays with CRLMM}
5 9
 \description{
6 10
   This is a faster and more efficient implementation of the CRLMM
... ...
@@ -8,16 +12,18 @@
8 12
   be soon extended to other platforms).
9 13
 }
10 14
 \usage{
11
-
12 15
 crlmm(filenames, row.names=TRUE, col.names=TRUE,
13 16
       probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5,
14 17
       gender=NULL, save.it=FALSE, load.it=FALSE,
15 18
       intensityFile, mixtureSampleSize=10^5,
16 19
       eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10,
17 20
       recallRegMin=1000, returnParams=FALSE, badSNP=0.7)
21
+calls(x)
22
+confs(x)
18 23
 }
19
-%- maybe also 'usage' for other objects documented here.
24
+
20 25
 \arguments{
26
+  \item{x}{'SnpSet' object. Usually the result of crlmm.}
21 27
   \item{filenames}{'character' vector with CEL files to be genotyped.}
22 28
   \item{row.names}{'logical'. Use rownames - SNP names?}
23 29
   \item{col.names}{'logical'. Use colnames - Sample names?}
... ...
@@ -43,6 +49,7 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE,
43 49
   \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)}
44 50
 }
45 51
 \value{
52
+  A \code{SnpSet} object.
46 53
   \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)}
47 54
   \item{confs}{Confidence scores 'round(-1000*log2(1-p))'}
48 55
   \item{SNPQC}{SNP Quality Scores}
... ...
@@ -67,8 +74,8 @@ if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){
67 74
   ## very useful when genotyping samples not in the working directory
68 75
   cels <- list.celfiles(path, full.names=TRUE)
69 76
   crlmmOutput <- crlmm(cels)
70
-  crlmmOutput[["calls"]][1:10, 1:2]
71
-  crlmmOutput[["confs"]][1:10, 1:2]
77
+  (calls(crlmmOutput)[1:10, 1:2])
78
+  (confs(crlmmOutput)[1:10, 1:2])
72 79
 }
73 80
 }
74 81
 \keyword{classif}
75 82
deleted file mode 100644
... ...
@@ -1,58 +0,0 @@
1
-\name{crlmmSet-class}
2
-\Rdversion{1.1}
3
-\docType{class}
4
-\alias{crlmmSet-class}
5
-\alias{calls}
6
-\alias{calls,crlmmSet-method}
7
-\alias{confs}
8
-\alias{confs,crlmmSet-method}
9
-\alias{list2crlmmSet}
10
-\alias{list2crlmmSet,list-method}
11
-
12
-\title{Class "crlmmSet"}
13
-\description{An eSet extension to store genotype results.}
14
-\section{Objects from the Class}{
15
-  Objects can be created by calls of the form \code{new("crlmmSet",
16
-    assayData, phenoData, featureData, experimentData, annotation,
17
-    ...)}.
18
-}
19
-\section{Slots}{
20
-	 \describe{
21
-    \item{\code{assayData}:}{Object of class \code{"AssayData"}. For an
22
-      object to be valid, the assayData slot must have two matrices
23
-      named 'calls' and 'confs'.}
24
-    \item{\code{phenoData}:}{Object of class \code{"AnnotatedDataFrame"}}
25
-    \item{\code{featureData}:}{Object of class \code{"AnnotatedDataFrame"}}
26
-    \item{\code{experimentData}:}{Object of class \code{"MIAME"}}
27
-    \item{\code{annotation}:}{Object of class \code{"character"}}
28
-    \item{\code{.__classVersion__}:}{Object of class \code{"Versions"}}
29
-  }
30
-}
31
-\section{Extends}{
32
-Class \code{"\link[Biobase]{eSet-class}"}, directly.
33
-Class \code{"\link[Biobase]{VersionedBiobase-class}"}, by class "eSet", distance 2.
34
-Class \code{"\link[Biobase]{Versioned-class}"}, by class "eSet", distance 3.
35
-}
36
-\section{Methods}{
37
-  \describe{
38
-    \item{calls}{\code{signature(x = "crlmmSet")}: extractor for
39
-      genotype calls}
40
-    \item{confs}{\code{signature(x = "crlmmSet")}: extractor for confidences}
41
-	 }
42
-}
43
-\examples{
44
-if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){
45
-  path <- system.file("celFiles", package="hapmapsnp5")
46
-
47
-  ## the filenames with full path...
48
-  ## very useful when genotyping samples not in the working directory
49
-  cels <- list.celfiles(path, full.names=TRUE)
50
-  crlmmOutput <- crlmm(cels)
51
-  crlmmSetObject <- list2crlmmSet(crlmmOutput)
52
-  crlmmSetObject
53
-  calls(crlmmSetObject)[1:10, 1:2]
54
-  confs(crlmmSetObject)[1:10, 1:2]
55
-}
56
-}
57
-\keyword{classes}
58
-\keyword{methods}