Exclude 232 SNPs returned by getVarInEnv("gns") that are not annotated
to a chromosome.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50804 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -46,8 +46,7 @@ getFeatureData <- function(cdfName, copynumber=FALSE){ |
46 | 46 |
M[index, "chromosome"] <- chromosome2integer(cnProbes[, grep("chr", colnames(cnProbes))]) |
47 | 47 |
M[index, "isSnp"] <- 0L |
48 | 48 |
} |
49 |
- ##A few of the snpProbes do not match -- I think it is chromosome Y. |
|
50 |
- M[is.na(M[, "isSnp"]), "isSnp"] <- 1L |
|
49 |
+ M <- M[!is.na(M[, "chromosome"]), ] |
|
51 | 50 |
M <- data.frame(M) |
52 | 51 |
return(new("AnnotatedDataFrame", data=M)) |
53 | 52 |
} |
... | ... |
@@ -141,7 +141,7 @@ readIdatFiles = function(sampleSheet=NULL, |
141 | 141 |
protocolData(RG)[["ScanDate"]] = dates$scan |
142 | 142 |
} |
143 | 143 |
storageMode(RG) = "lockedEnvironment" |
144 |
- is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
144 |
+ is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
145 | 145 |
if(is.lds) { |
146 | 146 |
close(RG@assayData$R) |
147 | 147 |
close(RG@assayData$G) |
... | ... |
@@ -467,7 +467,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
467 | 467 |
zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"), |
468 | 468 |
annotation=chipType, phenoData=RG@phenoData, |
469 | 469 |
protocolData=RG@protocolData, storage.mode="environment") |
470 |
- featureNames(XY) = ids |
|
470 |
+ featureNames(XY) = ids |
|
471 | 471 |
sampleNames(XY) = sampleNames(RG) |
472 | 472 |
gc() |
473 | 473 |
# Need to initialize - matrices filled with NAs to begin with |
... | ... |
@@ -476,7 +476,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
476 | 476 |
XY@assayData$zero[1:nsnps,] = 0 |
477 | 477 |
|
478 | 478 |
is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
479 |
- |
|
479 |
+ |
|
480 | 480 |
if(is.lds) { |
481 | 481 |
open(RG@assayData$G) |
482 | 482 |
open(RG@assayData$R) |
... | ... |
@@ -513,7 +513,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) { |
513 | 513 |
if(is.lds) { |
514 | 514 |
close(RG@assayData$G) |
515 | 515 |
close(RG@assayData$R) |
516 |
- close(RG@assayData$zero) |
|
516 |
+ close(RG@assayData$zero) |
|
517 | 517 |
close(XY@assayData$X) |
518 | 518 |
close(XY@assayData$Y) |
519 | 519 |
close(XY@assayData$zero) |
... | ... |
@@ -544,7 +544,7 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) { |
544 | 544 |
if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=max(stripnum), style=3) |
545 | 545 |
} |
546 | 546 |
is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
547 |
- |
|
547 |
+ |
|
548 | 548 |
if(is.lds) { |
549 | 549 |
open(XY@assayData$X) |
550 | 550 |
open(XY@assayData$Y) |
... | ... |
@@ -585,7 +585,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
585 | 585 |
cdfName, |
586 | 586 |
sns, |
587 | 587 |
stripNorm=TRUE, |
588 |
- useTarget=TRUE) { |
|
588 |
+ useTarget=TRUE) { |
|
589 | 589 |
# save.it=FALSE, |
590 | 590 |
# snpFile, |
591 | 591 |
# cnFile) { |
... | ... |
@@ -614,14 +614,14 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
614 | 614 |
theKnots = getVarInEnv("theKnots") |
615 | 615 |
narrays = ncol(XY) |
616 | 616 |
|
617 |
- is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
618 |
- |
|
617 |
+ is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
|
618 |
+ |
|
619 | 619 |
# if(save.it & !missing(cnFile)) { |
620 | 620 |
# separate out copy number probes |
621 | 621 |
npIndex = getVarInEnv("npProbesFid") |
622 | 622 |
nprobes = length(npIndex) |
623 | 623 |
if(length(nprobes)>0) { |
624 |
- if(is.lds) { |
|
624 |
+ if(is.lds) { |
|
625 | 625 |
open(XY@assayData$X) |
626 | 626 |
open(XY@assayData$Y) |
627 | 627 |
open(XY@assayData$zero) |
... | ... |
@@ -642,7 +642,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
642 | 642 |
# t0 <- proc.time()-t0 |
643 | 643 |
# if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".") |
644 | 644 |
rm(A, B, zero) |
645 |
- if(is.lds) { |
|
645 |
+ if(is.lds) { |
|
646 | 646 |
close(XY@assayData$X) |
647 | 647 |
close(XY@assayData$Y) |
648 | 648 |
close(XY@assayData$zero) |
... | ... |
@@ -686,7 +686,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
686 | 686 |
open(XY@assayData$Y) |
687 | 687 |
open(XY@assayData$zero) |
688 | 688 |
} |
689 |
- |
|
689 |
+ |
|
690 | 690 |
for(i in 1:narrays){ |
691 | 691 |
A[,i] = as.integer(exprs(channel(XY, "X"))[snpIndex,i]) |
692 | 692 |
B[,i] = as.integer(exprs(channel(XY, "Y"))[snpIndex,i]) |
... | ... |
@@ -723,7 +723,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
723 | 723 |
if(is.lds) { |
724 | 724 |
close(XY@assayData$X) |
725 | 725 |
close(XY@assayData$Y) |
726 |
- close(XY@assayData$zero) |
|
726 |
+ close(XY@assayData$zero) |
|
727 | 727 |
close(A) |
728 | 728 |
close(B) |
729 | 729 |
close(zero) |
... | ... |
@@ -735,7 +735,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5, |
735 | 735 |
res = list(A=A, B=B, |
736 | 736 |
zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW, |
737 | 737 |
mixtureParams=mixtureParams, cdfName=cdfName, cnAB=cnAB) |
738 |
- |
|
738 |
+ |
|
739 | 739 |
# open(res[["A"]]) |
740 | 740 |
# open(res[["B"]]) |
741 | 741 |
# open(res[["zero"]]) |
... | ... |
@@ -796,7 +796,7 @@ crlmmIllumina = function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
796 | 796 |
open(res[["SNR"]]) |
797 | 797 |
open(res[["mixtureParams"]]) |
798 | 798 |
} |
799 |
- |
|
799 |
+ |
|
800 | 800 |
# fD = featureData(XY) |
801 | 801 |
# phenD = XY@phenoData |
802 | 802 |
# protD = XY@protocolData |
... | ... |
@@ -859,7 +859,7 @@ crlmmIllumina = function(RG, XY, stripNorm=TRUE, useTarget=TRUE, |
859 | 859 |
verbose=verbose, |
860 | 860 |
returnParams=returnParams, |
861 | 861 |
badSNP=badSNP) |
862 |
- |
|
862 |
+ |
|
863 | 863 |
# res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]), |
864 | 864 |
# B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]), |
865 | 865 |
# SNR=res[["SNR"]], # callSet$SNR, # [j], |
... | ... |
@@ -923,7 +923,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
923 | 923 |
|
924 | 924 |
if(missing(cdfName)) stop("must specify cdfName") |
925 | 925 |
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames") |
926 |
- |
|
926 |
+ |
|
927 | 927 |
is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE) |
928 | 928 |
|
929 | 929 |
RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames, |
... | ... |
@@ -937,9 +937,9 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
937 | 937 |
delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero) |
938 | 938 |
} |
939 | 939 |
rm(RG); gc() |
940 |
- |
|
940 |
+ |
|
941 | 941 |
if (missing(sns)) { sns = sampleNames(XY) |
942 |
- } |
|
942 |
+ } |
|
943 | 943 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
944 | 944 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #, |
945 | 945 |
# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
... | ... |
@@ -952,7 +952,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
952 | 952 |
|
953 | 953 |
if(row.names) row.names=res$gns else row.names=NULL |
954 | 954 |
if(col.names) col.names=res$sns else col.names=NULL |
955 |
- |
|
955 |
+ |
|
956 | 956 |
FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT") |
957 | 957 |
## genotyping |
958 | 958 |
crlmmGTfxn = function(FUN,...){ |
... | ... |
@@ -977,7 +977,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL, |
977 | 977 |
verbose=verbose, |
978 | 978 |
returnParams=returnParams, |
979 | 979 |
badSNP=badSNP) |
980 |
- |
|
980 |
+ |
|
981 | 981 |
if(is.lds) { |
982 | 982 |
open(res[["SNR"]]); open(res[["SKW"]]) |
983 | 983 |
} |
... | ... |
@@ -1078,7 +1078,7 @@ construct.Illumina = function(sampleSheet=NULL, |
1078 | 1078 |
} |
1079 | 1079 |
|
1080 | 1080 |
narrays = length(arrayNames) |
1081 |
- |
|
1081 |
+ |
|
1082 | 1082 |
if(!missing(batch)) { |
1083 | 1083 |
stopifnot(length(batch) == narrays) |
1084 | 1084 |
} |
... | ... |
@@ -1132,7 +1132,7 @@ construct.Illumina = function(sampleSheet=NULL, |
1132 | 1132 |
protocolData(cnSet) = protocolData |
1133 | 1133 |
featureData(cnSet) = featureData |
1134 | 1134 |
featureNames(cnSet) = featureNames(featureData) |
1135 |
- pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) |
|
1135 |
+ pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) |
|
1136 | 1136 |
colnames(pd)=c("SKW", "SNR", "gender") |
1137 | 1137 |
phenoData(cnSet) = new("AnnotatedDataFrame", data=pd) |
1138 | 1138 |
return(cnSet) |
... | ... |
@@ -1175,7 +1175,7 @@ genotype.Illumina = function(sampleSheet=NULL, |
1175 | 1175 |
pkgname = getCrlmmAnnotationName(cdfName) |
1176 | 1176 |
callSet = construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames, |
1177 | 1177 |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
1178 |
- highDensity=highDensity, sep=sep, fileExt=fileExt, |
|
1178 |
+ highDensity=highDensity, sep=sep, fileExt=fileExt, |
|
1179 | 1179 |
cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch, |
1180 | 1180 |
fns=fns, saveDate=saveDate) |
1181 | 1181 |
if(missing(sns)) sns = sampleNames(callSet) |
... | ... |
@@ -1183,14 +1183,13 @@ genotype.Illumina = function(sampleSheet=NULL, |
1183 | 1183 |
is.snp = isSnp(callSet) |
1184 | 1184 |
snp.index = which(is.snp) |
1185 | 1185 |
narrays = ncol(callSet) |
1186 |
- |
|
1187 | 1186 |
if(is.lds) { |
1188 | 1187 |
sampleBatches = splitIndicesByNode(seq(along=sampleNames(callSet))) |
1189 | 1188 |
|
1190 | 1189 |
mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double") |
1191 | 1190 |
SNR = initializeBigVector("crlmmSNR-", narrays, "double") |
1192 | 1191 |
SKW = initializeBigVector("crlmmSKW-", narrays, "double") |
1193 |
- |
|
1192 |
+ |
|
1194 | 1193 |
ocLapply(sampleBatches, processIDAT, sampleSheet=sampleSheet, arrayNames=arrayNames, |
1195 | 1194 |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, highDensity=highDensity, |
1196 | 1195 |
sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose, mixtureSampleSize=mixtureSampleSize, |
... | ... |
@@ -1213,12 +1212,12 @@ genotype.Illumina = function(sampleSheet=NULL, |
1213 | 1212 |
|
1214 | 1213 |
XY = RGtoXY(RG, chipType=cdfName) |
1215 | 1214 |
rm(RG); gc() |
1216 |
- |
|
1215 |
+ |
|
1217 | 1216 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
1218 | 1217 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) |
1219 | 1218 |
rm(XY); gc() |
1220 | 1219 |
if(verbose) message("Finished preprocessing.") |
1221 |
- np.index = which(!is.snp) |
|
1220 |
+ np.index = which(!is.snp) |
|
1222 | 1221 |
A(callSet)[snp.index, ] = res[["A"]] |
1223 | 1222 |
B(callSet)[snp.index, ] = res[["B"]] |
1224 | 1223 |
if(length(np.index)>0) { |
... | ... |
@@ -1257,7 +1256,7 @@ genotype.Illumina = function(sampleSheet=NULL, |
1257 | 1256 |
tmpA = A(callSet)[snp.index,] |
1258 | 1257 |
tmpB = B(callSet)[snp.index,] |
1259 | 1258 |
} |
1260 |
- |
|
1259 |
+ |
|
1261 | 1260 |
tmp = crlmmGTfxn(FUN, |
1262 | 1261 |
A=tmpA, |
1263 | 1262 |
B=tmpB, |
... | ... |
@@ -1308,7 +1307,7 @@ processIDAT = function(sel, sampleSheet=NULL, |
1308 | 1307 |
highDensity=FALSE, |
1309 | 1308 |
sep="_", |
1310 | 1309 |
fileExt=list(green="Grn.idat", red="Red.idat"), |
1311 |
- saveDate=FALSE, |
|
1310 |
+ saveDate=FALSE, |
|
1312 | 1311 |
verbose=TRUE, |
1313 | 1312 |
mixtureSampleSize=10^5, |
1314 | 1313 |
fitMixture=TRUE, |
... | ... |
@@ -1319,7 +1318,7 @@ processIDAT = function(sel, sampleSheet=NULL, |
1319 | 1318 |
stripNorm=TRUE, |
1320 | 1319 |
useTarget=TRUE, |
1321 | 1320 |
A, B, SKW, SNR, mixtureParams, is.snp) { |
1322 |
- |
|
1321 |
+ |
|
1323 | 1322 |
if(length(path)>= length(sel)) path = path[sel] |
1324 | 1323 |
RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel], |
1325 | 1324 |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, |
... | ... |
@@ -1330,7 +1329,7 @@ processIDAT = function(sel, sampleSheet=NULL, |
1330 | 1329 |
delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero); rm(RG) |
1331 | 1330 |
gc() |
1332 | 1331 |
if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY) |
1333 |
- |
|
1332 |
+ |
|
1334 | 1333 |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose, |
1335 | 1334 |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) |
1336 | 1335 |
# save.it=save.it, snpFile=snpFile, cnFile=cnFile) |
... | ... |
@@ -1339,7 +1338,7 @@ processIDAT = function(sel, sampleSheet=NULL, |
1339 | 1338 |
gc() |
1340 | 1339 |
if(verbose) message("Finished preprocessing.") |
1341 | 1340 |
snp.index = which(is.snp) |
1342 |
- np.index = which(!is.snp) |
|
1341 |
+ np.index = which(!is.snp) |
|
1343 | 1342 |
|
1344 | 1343 |
open(res[["A"]]) |
1345 | 1344 |
open(res[["B"]]) |