Browse code

getFeatureData only returns features with chromosome annotation

Exclude 232 SNPs returned by getVarInEnv("gns") that are not annotated
to a chromosome.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@50804 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 11/11/2010 04:13:03
Showing 3 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 useDynLib("crlmm", .registration=TRUE)
2 2
 ## this is temporary
3
-## exportPattern("^[^\\.]")
3
+exportPattern("^[^\\.]")
4 4
 
5 5
 ##---------------------------------------------------------------------------
6 6
 ## Biobase
... ...
@@ -46,8 +46,7 @@ getFeatureData <- function(cdfName, copynumber=FALSE){
46 46
 		M[index, "chromosome"] <- chromosome2integer(cnProbes[, grep("chr", colnames(cnProbes))])
47 47
 		M[index, "isSnp"] <- 0L
48 48
 	}
49
-	##A few of the snpProbes do not match -- I think it is chromosome Y.
50
-	M[is.na(M[, "isSnp"]), "isSnp"] <- 1L
49
+	M <- M[!is.na(M[, "chromosome"]), ]
51 50
 	M <- data.frame(M)
52 51
 	return(new("AnnotatedDataFrame", data=M))
53 52
 }
... ...
@@ -141,7 +141,7 @@ readIdatFiles = function(sampleSheet=NULL,
141 141
 	       protocolData(RG)[["ScanDate"]] = dates$scan
142 142
        }
143 143
        storageMode(RG) = "lockedEnvironment"
144
-       is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)       
144
+       is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
145 145
        if(is.lds) {
146 146
          close(RG@assayData$R)
147 147
          close(RG@assayData$G)
... ...
@@ -467,7 +467,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
467 467
 	     zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
468 468
 	     annotation=chipType, phenoData=RG@phenoData,
469 469
 	     protocolData=RG@protocolData, storage.mode="environment")
470
-  featureNames(XY) = ids 
470
+  featureNames(XY) = ids
471 471
   sampleNames(XY) = sampleNames(RG)
472 472
   gc()
473 473
   # Need to initialize - matrices filled with NAs to begin with
... ...
@@ -476,7 +476,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
476 476
   XY@assayData$zero[1:nsnps,] = 0
477 477
 
478 478
   is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
479
-  
479
+
480 480
   if(is.lds) {
481 481
     open(RG@assayData$G)
482 482
     open(RG@assayData$R)
... ...
@@ -513,7 +513,7 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
513 513
   if(is.lds) {
514 514
     close(RG@assayData$G)
515 515
     close(RG@assayData$R)
516
-    close(RG@assayData$zero)    
516
+    close(RG@assayData$zero)
517 517
     close(XY@assayData$X)
518 518
     close(XY@assayData$Y)
519 519
     close(XY@assayData$zero)
... ...
@@ -544,7 +544,7 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
544 544
     if (getRversion() > '2.7.0') pb = txtProgressBar(min=0, max=max(stripnum), style=3)
545 545
   }
546 546
   is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
547
-  
547
+
548 548
   if(is.lds) {
549 549
     open(XY@assayData$X)
550 550
     open(XY@assayData$Y)
... ...
@@ -585,7 +585,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
585 585
 				cdfName,
586 586
 				sns,
587 587
 				stripNorm=TRUE,
588
-				useTarget=TRUE) { 
588
+				useTarget=TRUE) {
589 589
 #				save.it=FALSE,
590 590
 #				snpFile,
591 591
 #				cnFile) {
... ...
@@ -614,14 +614,14 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
614 614
   theKnots = getVarInEnv("theKnots")
615 615
   narrays = ncol(XY)
616 616
 
617
-  is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)    
618
-  
617
+  is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
618
+
619 619
 #  if(save.it & !missing(cnFile)) {
620 620
     # separate out copy number probes
621 621
     npIndex = getVarInEnv("npProbesFid")
622 622
     nprobes = length(npIndex)
623 623
     if(length(nprobes)>0) {
624
-      if(is.lds) {    
624
+      if(is.lds) {
625 625
         open(XY@assayData$X)
626 626
         open(XY@assayData$Y)
627 627
         open(XY@assayData$zero)
... ...
@@ -642,7 +642,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
642 642
 #      t0 <- proc.time()-t0
643 643
 #      if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
644 644
        rm(A, B, zero)
645
-      if(is.lds) {    
645
+      if(is.lds) {
646 646
         close(XY@assayData$X)
647 647
         close(XY@assayData$Y)
648 648
         close(XY@assayData$zero)
... ...
@@ -686,7 +686,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
686 686
     open(XY@assayData$Y)
687 687
     open(XY@assayData$zero)
688 688
   }
689
-  
689
+
690 690
   for(i in 1:narrays){
691 691
      A[,i] = as.integer(exprs(channel(XY, "X"))[snpIndex,i])
692 692
      B[,i] = as.integer(exprs(channel(XY, "Y"))[snpIndex,i])
... ...
@@ -723,7 +723,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
723 723
   if(is.lds) {
724 724
     close(XY@assayData$X)
725 725
     close(XY@assayData$Y)
726
-    close(XY@assayData$zero)    
726
+    close(XY@assayData$zero)
727 727
     close(A)
728 728
     close(B)
729 729
     close(zero)
... ...
@@ -735,7 +735,7 @@ preprocessInfinium2 = function(XY, mixtureSampleSize=10^5,
735 735
   res = list(A=A, B=B,
736 736
              zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
737 737
              mixtureParams=mixtureParams, cdfName=cdfName, cnAB=cnAB)
738
-  
738
+
739 739
 #  open(res[["A"]])
740 740
 #  open(res[["B"]])
741 741
 #  open(res[["zero"]])
... ...
@@ -796,7 +796,7 @@ crlmmIllumina = function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
796 796
       open(res[["SNR"]])
797 797
       open(res[["mixtureParams"]])
798 798
     }
799
-    
799
+
800 800
 #    fD = featureData(XY)
801 801
 #    phenD = XY@phenoData
802 802
 #    protD = XY@protocolData
... ...
@@ -859,7 +859,7 @@ crlmmIllumina = function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
859 859
                      verbose=verbose,
860 860
                      returnParams=returnParams,
861 861
                      badSNP=badSNP)
862
-    
862
+
863 863
 #  res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
864 864
 #                  B=res[["B"]], # as.matrix(B(callSet)[snp.index,]),  # j]),
865 865
 #                  SNR=res[["SNR"]], # callSet$SNR, # [j],
... ...
@@ -923,7 +923,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
923 923
 
924 924
     if(missing(cdfName)) stop("must specify cdfName")
925 925
     if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
926
-  
926
+
927 927
     is.lds = ifelse(isPackageLoaded("ff"), TRUE, FALSE)
928 928
 
929 929
     RG = readIdatFiles(sampleSheet=sampleSheet, arrayNames=arrayNames,
... ...
@@ -937,9 +937,9 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
937 937
       delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero)
938 938
     }
939 939
     rm(RG); gc()
940
-  
940
+
941 941
     if (missing(sns)) { sns = sampleNames(XY)
942
-    } 
942
+    }
943 943
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
944 944
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
945 945
 #                               save.it=save.it, snpFile=snpFile, cnFile=cnFile)
... ...
@@ -952,7 +952,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
952 952
 
953 953
     if(row.names) row.names=res$gns else row.names=NULL
954 954
     if(col.names) col.names=res$sns else col.names=NULL
955
-  
955
+
956 956
     FUN = ifelse(is.lds, "crlmmGT2", "crlmmGT")
957 957
     ## genotyping
958 958
     crlmmGTfxn = function(FUN,...){
... ...
@@ -977,7 +977,7 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
977 977
                      verbose=verbose,
978 978
                      returnParams=returnParams,
979 979
                      badSNP=badSNP)
980
-  
980
+
981 981
     if(is.lds) {
982 982
       open(res[["SNR"]]); open(res[["SKW"]])
983 983
     }
... ...
@@ -1078,7 +1078,7 @@ construct.Illumina = function(sampleSheet=NULL,
1078 1078
        }
1079 1079
 
1080 1080
        narrays = length(arrayNames)
1081
-          
1081
+
1082 1082
        if(!missing(batch)) {
1083 1083
 		stopifnot(length(batch) == narrays)
1084 1084
        }
... ...
@@ -1132,7 +1132,7 @@ construct.Illumina = function(sampleSheet=NULL,
1132 1132
 	protocolData(cnSet) = protocolData
1133 1133
 	featureData(cnSet) = featureData
1134 1134
 	featureNames(cnSet) = featureNames(featureData)
1135
-	pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet)) 
1135
+	pd = data.frame(matrix(NA, nc, 3), row.names=sampleNames(cnSet))
1136 1136
 	colnames(pd)=c("SKW", "SNR", "gender")
1137 1137
 	phenoData(cnSet) = new("AnnotatedDataFrame", data=pd)
1138 1138
 	return(cnSet)
... ...
@@ -1175,7 +1175,7 @@ genotype.Illumina = function(sampleSheet=NULL,
1175 1175
         pkgname = getCrlmmAnnotationName(cdfName)
1176 1176
 	callSet = construct.Illumina(sampleSheet=sampleSheet, arrayNames=arrayNames,
1177 1177
 			     ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
1178
-                             highDensity=highDensity, sep=sep, fileExt=fileExt, 
1178
+                             highDensity=highDensity, sep=sep, fileExt=fileExt,
1179 1179
 			     cdfName=cdfName, copynumber=copynumber, verbose=verbose, batch=batch,
1180 1180
                              fns=fns, saveDate=saveDate)
1181 1181
         if(missing(sns)) sns = sampleNames(callSet)
... ...
@@ -1183,14 +1183,13 @@ genotype.Illumina = function(sampleSheet=NULL,
1183 1183
 	is.snp = isSnp(callSet)
1184 1184
 	snp.index = which(is.snp)
1185 1185
         narrays = ncol(callSet)
1186
-        
1187 1186
         if(is.lds) {
1188 1187
           sampleBatches = splitIndicesByNode(seq(along=sampleNames(callSet)))
1189 1188
 
1190 1189
           mixtureParams = initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1191 1190
           SNR = initializeBigVector("crlmmSNR-", narrays, "double")
1192 1191
           SKW = initializeBigVector("crlmmSKW-", narrays, "double")
1193
-          
1192
+
1194 1193
           ocLapply(sampleBatches, processIDAT, sampleSheet=sampleSheet, arrayNames=arrayNames,
1195 1194
                  ids=ids, path=path, arrayInfoColNames=arrayInfoColNames, highDensity=highDensity,
1196 1195
                  sep=sep, fileExt=fileExt, saveDate=saveDate, verbose=verbose, mixtureSampleSize=mixtureSampleSize,
... ...
@@ -1213,12 +1212,12 @@ genotype.Illumina = function(sampleSheet=NULL,
1213 1212
 
1214 1213
           XY = RGtoXY(RG, chipType=cdfName)
1215 1214
           rm(RG); gc()
1216
-        
1215
+
1217 1216
           res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1218 1217
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget)
1219 1218
           rm(XY); gc()
1220 1219
           if(verbose) message("Finished preprocessing.")
1221
-          np.index = which(!is.snp)        
1220
+          np.index = which(!is.snp)
1222 1221
           A(callSet)[snp.index, ] = res[["A"]]
1223 1222
           B(callSet)[snp.index, ] = res[["B"]]
1224 1223
           if(length(np.index)>0) {
... ...
@@ -1257,7 +1256,7 @@ genotype.Illumina = function(sampleSheet=NULL,
1257 1256
           tmpA = A(callSet)[snp.index,]
1258 1257
           tmpB = B(callSet)[snp.index,]
1259 1258
         }
1260
-        
1259
+
1261 1260
 	tmp = crlmmGTfxn(FUN,
1262 1261
 			  A=tmpA,
1263 1262
 			  B=tmpB,
... ...
@@ -1308,7 +1307,7 @@ processIDAT =  function(sel, sampleSheet=NULL,
1308 1307
 			  highDensity=FALSE,
1309 1308
 			  sep="_",
1310 1309
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
1311
-			  saveDate=FALSE, 
1310
+			  saveDate=FALSE,
1312 1311
                           verbose=TRUE,
1313 1312
                           mixtureSampleSize=10^5,
1314 1313
 			  fitMixture=TRUE,
... ...
@@ -1319,7 +1318,7 @@ processIDAT =  function(sel, sampleSheet=NULL,
1319 1318
 			  stripNorm=TRUE,
1320 1319
 			  useTarget=TRUE,
1321 1320
                           A, B, SKW, SNR, mixtureParams, is.snp) {
1322
-  
1321
+
1323 1322
         if(length(path)>= length(sel)) path = path[sel]
1324 1323
         RG = readIdatFiles(sampleSheet=sampleSheet[sel,], arrayNames=arrayNames[sel],
1325 1324
                        ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
... ...
@@ -1330,7 +1329,7 @@ processIDAT =  function(sel, sampleSheet=NULL,
1330 1329
         delete(RG@assayData$R); delete(RG@assayData$G); delete(RG@assayData$zero); rm(RG)
1331 1330
         gc()
1332 1331
         if (missing(sns) || length(sns)!=ncol(XY)) sns = sampleNames(XY)
1333
-        
1332
+
1334 1333
         res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1335 1334
                                seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget)
1336 1335
         #                       save.it=save.it, snpFile=snpFile, cnFile=cnFile)
... ...
@@ -1339,7 +1338,7 @@ processIDAT =  function(sel, sampleSheet=NULL,
1339 1338
         gc()
1340 1339
 	if(verbose) message("Finished preprocessing.")
1341 1340
         snp.index = which(is.snp)
1342
-	np.index = which(!is.snp)      
1341
+	np.index = which(!is.snp)
1343 1342
 
1344 1343
         open(res[["A"]])
1345 1344
         open(res[["B"]])