git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@38275 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -49,9 +49,12 @@ BioC data packages |
49 | 49 |
2009-03-25 Rob Scharpf - committed version 1.0.61 |
50 | 50 |
* update to copynumber vignette |
51 | 51 |
|
52 |
+2009-03-27 B Carvalho - committed version 1.0.62 |
|
52 | 53 |
|
54 |
+* crlmm() returns a SnpSet object (from Biobase) |
|
53 | 55 |
|
56 |
+* fixed standard vignette to accommodate this change |
|
54 | 57 |
|
58 |
+* redefined calls/confs to accommodate this change |
|
55 | 59 |
|
56 |
- |
|
57 |
- |
|
60 |
+* fixed documentation to acommodate this change |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: crlmm |
2 | 2 |
Type: Package |
3 | 3 |
Title: Genotype Calling (CRLMM) and Copy Number Analysis tool for SNP 5.0 and 6.0 arrays. |
4 |
-Version: 1.0.61 |
|
4 |
+Version: 1.0.62 |
|
5 | 5 |
Date: 2008-12-28 |
6 | 6 |
Author: Rafael A Irizarry, Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu> |
7 | 7 |
Maintainer: Benilton S Carvalho <bcarvalh@jhsph.edu>, Robert Scharpf <rscharpf@jhsph.edu> |
... | ... |
@@ -10,7 +10,7 @@ License: Artistic-2.0 |
10 | 10 |
Imports: affyio, preprocessCore, utils, stats, genefilter, splines, Biobase, mvtnorm |
11 | 11 |
Suggests: hapmapsnp5, genomewidesnp5Crlmm, genomewidesnp6Crlmm (>= 1.0.1), methods, GGdata, snpMatrix |
12 | 12 |
Collate: AllClasses.R |
13 |
- crlmm-methods.R |
|
13 |
+ methods-SnpSet.R |
|
14 | 14 |
cnrma-functions.R |
15 | 15 |
crlmm-functions.R |
16 | 16 |
snprma-functions.R |
... | ... |
@@ -1,7 +1,4 @@ |
1 | 1 |
useDynLib("crlmm", .registration=TRUE) |
2 |
-export("crlmm", "list.celfiles", "computeCopynumber", "cnrma", "celDates") |
|
3 |
-exportMethods("list2crlmmSet", "calls", "confs") |
|
4 |
-exportClasses("crlmmSet") |
|
5 | 2 |
import(Biobase) |
6 | 3 |
importFrom(affyio, read.celfile.header, read.celfile) |
7 | 4 |
importFrom(preprocessCore, normalize.quantiles.use.target) |
... | ... |
@@ -10,3 +7,5 @@ importFrom(stats, coef, cov, dnorm, kmeans, lm, mad, median, quantile, sd) |
10 | 7 |
importFrom(genefilter, rowSds) |
11 | 8 |
importFrom(mvtnorm, dmvnorm) |
12 | 9 |
|
10 |
+export("crlmm", "list.celfiles", "computeCopynumber", "cnrma", "celDates") |
|
11 |
+exportMethods("calls", "confs") |
... | ... |
@@ -189,7 +189,8 @@ instantiateObjects <- function(calls, NP, plate, envir, chrom, A, B, |
189 | 189 |
gender, SNRmin=5, SNR){ |
190 | 190 |
envir[["chrom"]] <- chrom |
191 | 191 |
CHR_INDEX <- paste(chrom, "index", sep="") |
192 |
- data(list=CHR_INDEX, package="genomewidesnp6Crlmm") |
|
192 |
+ data(list=CHR_INDEX, package="genomewidesnp6Crlmm") |
|
193 |
+ ## Rob, where does 'index[[1]]' come from? |
|
193 | 194 |
A <- A[index[[1]], SNR > SNRmin] |
194 | 195 |
B <- B[index[[1]], SNR > SNRmin] |
195 | 196 |
calls <- calls[index[[1]], SNR > SNRmin] |
... | ... |
@@ -43,7 +43,7 @@ crlmm <- function(filenames, row.names=TRUE, col.names=TRUE, |
43 | 43 |
|
44 | 44 |
res2[["SNR"]] <- res[["SNR"]] |
45 | 45 |
res2[["SKW"]] <- res[["SKW"]] |
46 |
- return(res2) |
|
46 |
+ return(list2SnpSet(res2, returnParams=returnParams)) |
|
47 | 47 |
} |
48 | 48 |
|
49 | 49 |
crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
... | ... |
@@ -122,9 +122,8 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
122 | 122 |
newparams[["N"]][, 3]) > recallMin & |
123 | 123 |
!apply(regionInfo, 1, any)), |
124 | 124 |
autosomeIndex) |
125 |
- |
|
126 | 125 |
if(length(Index) < recallRegMin){ |
127 |
- warning("Recallibration not possible.") |
|
126 |
+ warning("Recalibration not possible.") |
|
128 | 127 |
newparams <- params |
129 | 128 |
dev <- vector("numeric", nrow(newparams[["centers"]])) |
130 | 129 |
SS <- matrix(Inf, 3, 3) |
... | ... |
@@ -221,9 +220,9 @@ crlmmGT <- function(A, B, SNR, mixtureParams, cdfName, row.names=NULL, |
221 | 220 |
|
222 | 221 |
if(verbose) message("Done.") |
223 | 222 |
if (returnParams){ |
224 |
- return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, params=params, DD=DD, covDD=SS)) |
|
223 |
+ return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, params=params, DD=DD, covDD=SS, gender=gender, pkgname=pkgname)) |
|
225 | 224 |
}else{ |
226 |
- return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, DD=DD, covDD=SS)) |
|
225 |
+ return(list(calls=A, confs=B, SNPQC=dev, batchQC=batchQC, DD=DD, covDD=SS, gender=gender, pkgname=pkgname)) |
|
227 | 226 |
} |
228 | 227 |
} |
229 | 228 |
|
230 | 229 |
deleted file mode 100644 |
... | ... |
@@ -1,27 +0,0 @@ |
1 |
-## Methods for crlmm |
|
2 |
- |
|
3 |
-setGeneric("calls", function(x) standardGeneric("calls")) |
|
4 |
-setMethod("calls", "crlmmSet", function(x) assayData(x)$calls) |
|
5 |
- |
|
6 |
-setGeneric("confs", function(x) standardGeneric("confs")) |
|
7 |
-setMethod("confs", "crlmmSet", function(x) assayData(x)$confs) |
|
8 |
- |
|
9 |
-setGeneric("list2crlmmSet", function(x) standardGeneric("list2crlmmSet")) |
|
10 |
-setMethod("list2crlmmSet", "list", |
|
11 |
- function(x){ |
|
12 |
- pd <- data.frame(SNR=x[["SNR"]], |
|
13 |
- row.names=colnames(x[["calls"]])) |
|
14 |
- pdv <- data.frame(labelDescription=c("Signal-to-noise Ratio"), |
|
15 |
- row.names=c("SNR")) |
|
16 |
- fd <- data.frame(SNPQC=x[["SNPQC"]], |
|
17 |
- row.names=rownames(x[["calls"]])) |
|
18 |
- fdv <- data.frame(labelDescription=c("SNP Quality Score"), |
|
19 |
- row.names=c("SNPQC")) |
|
20 |
- new("crlmmSet", |
|
21 |
- assayData=assayDataNew("lockedEnvironment", |
|
22 |
- calls=x[["calls"]], confs=x[["confs"]]), |
|
23 |
- phenoData=new("AnnotatedDataFrame", |
|
24 |
- data=pd, varMetadata=pdv), |
|
25 |
- featureData=new("AnnotatedDataFrame", |
|
26 |
- data=fd, varMetadata=fdv)) |
|
27 |
- }) |
28 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,7 @@ |
1 |
+## Methods for crlmm |
|
2 |
+ |
|
3 |
+setGeneric("calls", function(x) standardGeneric("calls")) |
|
4 |
+setMethod("calls", "SnpSet", function(x) assayData(x)$call) |
|
5 |
+ |
|
6 |
+setGeneric("confs", function(x) standardGeneric("confs")) |
|
7 |
+setMethod("confs", "SnpSet", function(x) assayData(x)$callProbability) |
... | ... |
@@ -42,3 +42,95 @@ getVarInEnv <- function(dataset, environ=.crlmmPkgEnv){ |
42 | 42 |
stop("Variable ", dataset, " not found in .crlmmPkgEnv") |
43 | 43 |
environ[[dataset]] |
44 | 44 |
} |
45 |
+ |
|
46 |
+list2SnpSet <- function(x, returnParams=FALSE){ |
|
47 |
+ pd <- data.frame(SNR=x[["SNR"]], gender=x[["gender"]], |
|
48 |
+ row.names=colnames(x[["calls"]])) |
|
49 |
+ pdv <- data.frame(labelDescription=c("Signal-to-noise Ratio", |
|
50 |
+ "Gender: Male (1) and Female (2)"), |
|
51 |
+ row.names=c("SNR", "gender")) |
|
52 |
+ recall <- length(x[["DD"]]) > 1 |
|
53 |
+ if (returnParams){ |
|
54 |
+ if (recall){ |
|
55 |
+ fd <- data.frame(SNPQC=x[["SNPQC"]], |
|
56 |
+ cAA=x[["params"]][["centers"]][,1], |
|
57 |
+ cAB=x[["params"]][["centers"]][,2], |
|
58 |
+ cBB=x[["params"]][["centers"]][,3], |
|
59 |
+ sAA=x[["params"]][["scales"]][,1], |
|
60 |
+ sAB=x[["params"]][["scales"]][,2], |
|
61 |
+ sBB=x[["params"]][["scales"]][,3], |
|
62 |
+ nAA=x[["params"]][["N"]][,1], |
|
63 |
+ nAB=x[["params"]][["N"]][,2], |
|
64 |
+ nBB=x[["params"]][["N"]][,3], |
|
65 |
+ spAA=x[["DD"]][,1], |
|
66 |
+ spAB=x[["DD"]][,2], |
|
67 |
+ spBB=x[["DD"]][,3], |
|
68 |
+ row.names=rownames(x[["calls"]])) |
|
69 |
+ fdv <- data.frame(labelDescription=c( |
|
70 |
+ "SNP Quality Score", |
|
71 |
+ "Center AA", "Center AB", "Center BB", |
|
72 |
+ "Scale AA", "Scale AB", "Scale BB", |
|
73 |
+ "N AA", "N AB", "N BB", |
|
74 |
+ "Shift in parameters AA", |
|
75 |
+ "Shift in parameters AB", |
|
76 |
+ "Shift in parameters BB"), |
|
77 |
+ row.names=c( |
|
78 |
+ "SNPQC", |
|
79 |
+ "cAA", "cAB", "cBB", |
|
80 |
+ "sAA", "sAB", "sBB", |
|
81 |
+ "nAA", "nAB", "nBB", |
|
82 |
+ "spAA", "spAB", "spBB")) |
|
83 |
+ }else{ |
|
84 |
+ fd <- data.frame(SNPQC=x[["SNPQC"]], |
|
85 |
+ cAA=x[["params"]][["centers"]][,1], |
|
86 |
+ cAB=x[["params"]][["centers"]][,2], |
|
87 |
+ cBB=x[["params"]][["centers"]][,3], |
|
88 |
+ sAA=x[["params"]][["scales"]][,1], |
|
89 |
+ sAB=x[["params"]][["scales"]][,2], |
|
90 |
+ sBB=x[["params"]][["scales"]][,3], |
|
91 |
+ nAA=x[["params"]][["N"]][,1], |
|
92 |
+ nAB=x[["params"]][["N"]][,2], |
|
93 |
+ nBB=x[["params"]][["N"]][,3], |
|
94 |
+ row.names=rownames(x[["calls"]])) |
|
95 |
+ fdv <- data.frame(labelDescription=c( |
|
96 |
+ "SNP Quality Score", |
|
97 |
+ "Center AA", "Center AB", "Center BB", |
|
98 |
+ "Scale AA", "Scale AB", "Scale BB", |
|
99 |
+ "N AA", "N AB", "N BB", |
|
100 |
+ "Shift in parameters AA", |
|
101 |
+ "Shift in parameters AB", |
|
102 |
+ "Shift in parameters BB"), |
|
103 |
+ row.names=c( |
|
104 |
+ "SNPQC", |
|
105 |
+ "cAA", "cAB", "cBB", |
|
106 |
+ "sAA", "sAB", "sBB", |
|
107 |
+ "nAA", "nAB", "nBB")) |
|
108 |
+ } |
|
109 |
+ }else{ |
|
110 |
+ if (recall){ |
|
111 |
+ fd <- data.frame(SNPQC=x[["SNPQC"]], |
|
112 |
+ spAA=x[["DD"]][,1], |
|
113 |
+ spAB=x[["DD"]][,2], |
|
114 |
+ spBB=x[["DD"]][,3], |
|
115 |
+ row.names=rownames(x[["calls"]])) |
|
116 |
+ fdv <- data.frame(labelDescription=c("SNP Quality Score", |
|
117 |
+ "Shift in parameters AA", |
|
118 |
+ "Shift in parameters AB", |
|
119 |
+ "Shift in parameters BB"), |
|
120 |
+ row.names=c("SNPQC", "spAA", "spAB", "spBB")) |
|
121 |
+ }else{ |
|
122 |
+ fd <- data.frame(SNPQC=x[["SNPQC"]], |
|
123 |
+ row.names=rownames(x[["calls"]])) |
|
124 |
+ fdv <- data.frame(labelDescription=c("SNP Quality Score"), |
|
125 |
+ row.names=c("SNPQC")) |
|
126 |
+ } |
|
127 |
+ } |
|
128 |
+ new("SnpSet", |
|
129 |
+ assayData=assayDataNew("lockedEnvironment", |
|
130 |
+ call=x[["calls"]], callProbability=x[["confs"]]), |
|
131 |
+ phenoData=new("AnnotatedDataFrame", |
|
132 |
+ data=pd, varMetadata=pdv), |
|
133 |
+ featureData=new("AnnotatedDataFrame", |
|
134 |
+ data=fd, varMetadata=fdv), |
|
135 |
+ annotation=x[["pkgname"]]) |
|
136 |
+} |
... | ... |
@@ -14,7 +14,7 @@ |
14 | 14 |
|
15 | 15 |
\begin{document} |
16 | 16 |
\title{Genotyping with the \Rpackage{crlmm} Package} |
17 |
-\date{January, 2009} |
|
17 |
+\date{March, 2009} |
|
18 | 18 |
\author{Benilton Carvalho} |
19 | 19 |
\maketitle |
20 | 20 |
|
... | ... |
@@ -56,20 +56,21 @@ celFiles <- list.celfiles(path, full.names=TRUE) |
56 | 56 |
system.time(crlmmResult <- crlmm(celFiles, verbose=FALSE)) |
57 | 57 |
@ |
58 | 58 |
|
59 |
-The \Robject{crlmmResult} is a list with the following components: |
|
59 |
+The \Robject{crlmmResult} is a \Rclass{SnpSet} (see Biobase) object. |
|
60 | 60 |
\begin{itemize} |
61 | 61 |
\item \Robject{calls}: genotype calls (1 - AA; 2 - AB; 3 - BB); |
62 | 62 |
\item \Robject{confs}: confidence scores, which can be translated to probabilities by using: |
63 | 63 |
\[ 1-2^-(\mbox{confs}/1000), \] although we prefer this |
64 | 64 |
representation as it saves a significant amount of memory; |
65 | 65 |
\item \Robject{SNPQC}: SNP quality score; |
66 |
-\item \Robject{batchQC}: Batch quality score; |
|
66 |
+%%\item \Robject{batchQC}: Batch quality score; |
|
67 | 67 |
\item \Robject{SNR}: Signal-to-noise ratio. |
68 | 68 |
\end{itemize} |
69 | 69 |
|
70 | 70 |
<<out>>= |
71 |
-crlmmResult[["calls"]][1:10,] |
|
72 |
-crlmmResult[["confs"]][1:10,] |
|
71 |
+calls(crlmmResult)[1:10,] |
|
72 |
+confs(crlmmResult)[1:10,] |
|
73 |
+crlmmResult[["SNR"]] |
|
73 | 74 |
@ |
74 | 75 |
|
75 | 76 |
\section{Details} |
... | ... |
@@ -10,8 +10,10 @@ Faster implementation of CRLMM specific to SNP 5.0 and 6.0 arrays. |
10 | 10 |
\details{ |
11 | 11 |
Index: |
12 | 12 |
\preformatted{ |
13 |
-crlmm Genotype SNP 5.0 or 6.0 samples. |
|
14 | 13 |
crlmm-package New implementation of the CRLMM Algorithm. |
14 |
+crlmm Genotype SNP 5.0 or 6.0 samples. |
|
15 |
+calls Accessor for genotype calls. |
|
16 |
+confs Accessor for confidences. |
|
15 | 17 |
} |
16 | 18 |
The 'crlmm' package reimplements the CRLMM algorithm present in the |
17 | 19 |
'oligo' package. This implementation primes for efficient genotyping of |
... | ... |
@@ -1,6 +1,10 @@ |
1 | 1 |
\name{crlmm} |
2 | 2 |
\alias{crlmm} |
3 |
-%- Also NEED an '\alias' for EACH other topic documented here. |
|
3 |
+\alias{calls} |
|
4 |
+\alias{calls,SnpSet-method} |
|
5 |
+\alias{confs} |
|
6 |
+\alias{confs,SnpSet-method} |
|
7 |
+ |
|
4 | 8 |
\title{Genotype oligonucleotide arrays with CRLMM} |
5 | 9 |
\description{ |
6 | 10 |
This is a faster and more efficient implementation of the CRLMM |
... | ... |
@@ -8,16 +12,18 @@ |
8 | 12 |
be soon extended to other platforms). |
9 | 13 |
} |
10 | 14 |
\usage{ |
11 |
- |
|
12 | 15 |
crlmm(filenames, row.names=TRUE, col.names=TRUE, |
13 | 16 |
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, |
14 | 17 |
gender=NULL, save.it=FALSE, load.it=FALSE, |
15 | 18 |
intensityFile, mixtureSampleSize=10^5, |
16 | 19 |
eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, |
17 | 20 |
recallRegMin=1000, returnParams=FALSE, badSNP=0.7) |
21 |
+calls(x) |
|
22 |
+confs(x) |
|
18 | 23 |
} |
19 |
-%- maybe also 'usage' for other objects documented here. |
|
24 |
+ |
|
20 | 25 |
\arguments{ |
26 |
+ \item{x}{'SnpSet' object. Usually the result of crlmm.} |
|
21 | 27 |
\item{filenames}{'character' vector with CEL files to be genotyped.} |
22 | 28 |
\item{row.names}{'logical'. Use rownames - SNP names?} |
23 | 29 |
\item{col.names}{'logical'. Use colnames - Sample names?} |
... | ... |
@@ -43,6 +49,7 @@ crlmm(filenames, row.names=TRUE, col.names=TRUE, |
43 | 49 |
\item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)} |
44 | 50 |
} |
45 | 51 |
\value{ |
52 |
+ A \code{SnpSet} object. |
|
46 | 53 |
\item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)} |
47 | 54 |
\item{confs}{Confidence scores 'round(-1000*log2(1-p))'} |
48 | 55 |
\item{SNPQC}{SNP Quality Scores} |
... | ... |
@@ -67,8 +74,8 @@ if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){ |
67 | 74 |
## very useful when genotyping samples not in the working directory |
68 | 75 |
cels <- list.celfiles(path, full.names=TRUE) |
69 | 76 |
crlmmOutput <- crlmm(cels) |
70 |
- crlmmOutput[["calls"]][1:10, 1:2] |
|
71 |
- crlmmOutput[["confs"]][1:10, 1:2] |
|
77 |
+ (calls(crlmmOutput)[1:10, 1:2]) |
|
78 |
+ (confs(crlmmOutput)[1:10, 1:2]) |
|
72 | 79 |
} |
73 | 80 |
} |
74 | 81 |
\keyword{classif} |
75 | 82 |
deleted file mode 100644 |
... | ... |
@@ -1,58 +0,0 @@ |
1 |
-\name{crlmmSet-class} |
|
2 |
-\Rdversion{1.1} |
|
3 |
-\docType{class} |
|
4 |
-\alias{crlmmSet-class} |
|
5 |
-\alias{calls} |
|
6 |
-\alias{calls,crlmmSet-method} |
|
7 |
-\alias{confs} |
|
8 |
-\alias{confs,crlmmSet-method} |
|
9 |
-\alias{list2crlmmSet} |
|
10 |
-\alias{list2crlmmSet,list-method} |
|
11 |
- |
|
12 |
-\title{Class "crlmmSet"} |
|
13 |
-\description{An eSet extension to store genotype results.} |
|
14 |
-\section{Objects from the Class}{ |
|
15 |
- Objects can be created by calls of the form \code{new("crlmmSet", |
|
16 |
- assayData, phenoData, featureData, experimentData, annotation, |
|
17 |
- ...)}. |
|
18 |
-} |
|
19 |
-\section{Slots}{ |
|
20 |
- \describe{ |
|
21 |
- \item{\code{assayData}:}{Object of class \code{"AssayData"}. For an |
|
22 |
- object to be valid, the assayData slot must have two matrices |
|
23 |
- named 'calls' and 'confs'.} |
|
24 |
- \item{\code{phenoData}:}{Object of class \code{"AnnotatedDataFrame"}} |
|
25 |
- \item{\code{featureData}:}{Object of class \code{"AnnotatedDataFrame"}} |
|
26 |
- \item{\code{experimentData}:}{Object of class \code{"MIAME"}} |
|
27 |
- \item{\code{annotation}:}{Object of class \code{"character"}} |
|
28 |
- \item{\code{.__classVersion__}:}{Object of class \code{"Versions"}} |
|
29 |
- } |
|
30 |
-} |
|
31 |
-\section{Extends}{ |
|
32 |
-Class \code{"\link[Biobase]{eSet-class}"}, directly. |
|
33 |
-Class \code{"\link[Biobase]{VersionedBiobase-class}"}, by class "eSet", distance 2. |
|
34 |
-Class \code{"\link[Biobase]{Versioned-class}"}, by class "eSet", distance 3. |
|
35 |
-} |
|
36 |
-\section{Methods}{ |
|
37 |
- \describe{ |
|
38 |
- \item{calls}{\code{signature(x = "crlmmSet")}: extractor for |
|
39 |
- genotype calls} |
|
40 |
- \item{confs}{\code{signature(x = "crlmmSet")}: extractor for confidences} |
|
41 |
- } |
|
42 |
-} |
|
43 |
-\examples{ |
|
44 |
-if (require(genomewidesnp5Crlmm) & require(hapmapsnp5)){ |
|
45 |
- path <- system.file("celFiles", package="hapmapsnp5") |
|
46 |
- |
|
47 |
- ## the filenames with full path... |
|
48 |
- ## very useful when genotyping samples not in the working directory |
|
49 |
- cels <- list.celfiles(path, full.names=TRUE) |
|
50 |
- crlmmOutput <- crlmm(cels) |
|
51 |
- crlmmSetObject <- list2crlmmSet(crlmmOutput) |
|
52 |
- crlmmSetObject |
|
53 |
- calls(crlmmSetObject)[1:10, 1:2] |
|
54 |
- confs(crlmmSetObject)[1:10, 1:2] |
|
55 |
-} |
|
56 |
-} |
|
57 |
-\keyword{classes} |
|
58 |
-\keyword{methods} |