Browse code

my .emacs wants to remove white spaces at end of lines. no substantive changes here

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/crlmm@49135 bc3139a8-67e5-0310-9ffc-ced21a209358

Rob Scharp authored on 30/08/2010 19:40:50
Showing2 changed files

... ...
@@ -12,187 +12,6 @@ readIdatFiles <- function(sampleSheet=NULL,
12 12
 			  sep="_",
13 13
 			  fileExt=list(green="Grn.idat", red="Red.idat"),
14 14
 			  saveDate=FALSE) {
15
-#       if(!is.null(arrayNames)) {
16
-#	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
17
-#	       if(!is.null(sampleSheet)) {
18
-#		       sampleSheet=NULL
19
-#		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
20
-#	       }
21
-#	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
22
-#       }
23
-       if(!is.null(arrayNames)) {
24
-               pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
25
-       }
26
-       if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
27
-	       if(is.null(arrayNames)){
28
-		       ##arrayNames=NULL
29
-		       if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
30
-			       barcode = sampleSheet[,arrayInfoColNames$barcode]
31
-			       arrayNames=barcode
32
-		       }
33
-		       if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
34
-			       position = sampleSheet[,arrayInfoColNames$position]
35
-			       if(is.null(arrayNames))
36
-				       arrayNames=position
37
-			       else
38
-				       arrayNames = paste(arrayNames, position, sep=sep)
39
-			       if(highDensity) {
40
-				       hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
41
-				       for(i in names(hdExt))
42
-					       arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
43
-			       }
44
-		       }
45
-	       }
46
-	       pd = new("AnnotatedDataFrame", data = sampleSheet)
47
-	       sampleNames(pd) <- basename(arrayNames)
48
-       }
49
-       if(is.null(arrayNames)) {
50
-               arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
51
-               if(!is.null(sampleSheet)) {
52
-                      sampleSheet=NULL
53
-                      cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
54
-               }
55
-               pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
56
-       }
57
-       narrays = length(arrayNames)
58
-       grnfiles = paste(arrayNames, fileExt$green, sep=sep)
59
-       redfiles = paste(arrayNames, fileExt$red, sep=sep)
60
-       if(length(grnfiles)==0 || length(redfiles)==0)
61
-	       stop("Cannot find .idat files")
62
-       if(length(grnfiles)!=length(redfiles))
63
-	       stop("Cannot find matching .idat files")
64
-       if(path[1] != "."){
65
-	       grnidats = file.path(path, grnfiles)
66
-	       redidats = file.path(path, redfiles)
67
-       }  else {
68
-	       message("path arg not set.  Assuming files are in local directory")
69
-	       grnidats <- grnfiles
70
-	       redidats <- redfiles
71
-       }
72
-       if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
73
-       if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
74
-##       if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
75
-##	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
76
-##		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
77
-##       }
78
-       headerInfo = list(nProbes = rep(NA, narrays),
79
-                         Barcode = rep(NA, narrays),
80
-                         ChipType = rep(NA, narrays),
81
-                         Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
82
-                         Position = rep(NA, narrays)) # this may also vary a bit
83
-       dates = list(decode=rep(NA, narrays),
84
-                    scan=rep(NA, narrays))
85
-       ## read in the data
86
-       for(i in seq(along=arrayNames)) {
87
-	       cat("reading", arrayNames[i], "\t")
88
-	       idsG = idsR = G = R = NULL
89
-	       cat(paste(sep, fileExt$green, sep=""), "\t")
90
-	       G = readIDAT(grnidats[i])
91
-	       idsG = rownames(G$Quants)
92
-	       headerInfo$nProbes[i] = G$nSNPsRead
93
-	       headerInfo$Barcode[i] = G$Barcode
94
-	       headerInfo$ChipType[i] = G$ChipType
95
-	       headerInfo$Manifest[i] = G$Unknown$MostlyNull
96
-	       headerInfo$Position[i] = G$Unknowns$MostlyA
97
-               if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
98
-                       ## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
99
-		       ## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
100
-		       ## but differed by a few SNPs for some reason - most of the chip was the same though
101
-		       ##           stop("Chips are not of all of the same type - please check your data")
102
-		       warning("Chips are not of the same type.  Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
103
-		       next()
104
-	       }
105
-	       dates$decode[i] = G$RunInfo[1, 1]
106
-	       dates$scan[i] = G$RunInfo[2, 1]
107
-	       if(i==1) {
108
-		       if(is.null(ids) && !is.null(G)){
109
-			       ids = idsG
110
-		       }else stop("Could not find probe IDs")
111
-		       nprobes = length(ids)
112
-		       narrays = length(arrayNames)
113
-		       tmpmat = matrix(NA, nprobes, narrays)
114
-		       rownames(tmpmat) = ids
115
-		       if(!is.null(sampleSheet)){
116
-			       colnames(tmpmat) = sampleSheet$Sample_ID
117
-		       }else  colnames(tmpmat) = arrayNames
118
-		       RG <- new("NChannelSet",
119
-				 R=tmpmat,
120
-				 G=tmpmat,
121
-				 zero=tmpmat,
122
-#				 Rnb=tmpmat,
123
-#				 Gnb=tmpmat,
124
-#				 Rse=tmpmat,
125
-#				 Gse=tmpmat,
126
-				 annotation=headerInfo$Manifest[1],
127
-				 phenoData=pd,
128
-				 storage.mode="environment")
129
-		       rm(tmpmat)
130
-		       gc()
131
-	       }
132
-	       if(length(ids)==length(idsG)) {
133
-		       if(sum(ids==idsG)==nprobes) {
134
-			       RG@assayData$G[,i] = G$Quants[, "Mean"]
135
-				   zeroG = G$Quants[, "NBeads"]==0
136
-#			       RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
137
-#			       RG@assayData$Gse[,i] = G$Quants[, "SD"]
138
-		       }
139
-	       } else {
140
-		       indG = match(ids, idsG)
141
-		       RG@assayData$G[,i] = G$Quants[indG, "Mean"]
142
-			   zeroG = G$Quants[indG, "NBeads"]==0
143
-#		       RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
144
-#		       RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
145
-	       }
146
-	       rm(G)
147
-	       gc()
148
-
149
-	       cat(paste(sep, fileExt$red, sep=""), "\n")
150
-	       R = readIDAT(redidats[i])
151
-	       idsR = rownames(R$Quants)
152
-
153
-	       if(length(ids)==length(idsG)) {
154
-		       if(sum(ids==idsR)==nprobes) {
155
-			       RG@assayData$R[,i] = R$Quants[ ,"Mean"]
156
-				   zeroR = R$Quants[ ,"NBeads"]==0
157
-#			       RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
158
-#			       RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
159
-		       }
160
-	       } else {
161
-		       indR = match(ids, idsR)
162
-		       RG@assayData$R[,i] = R$Quants[indR, "Mean"]
163
-			   zeroR = R$Quants[indR, "NBeads"]==0
164
-#		       RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
165
-#		       RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
166
-	       }
167
-		   RG@assayData$zero[,i] = zeroG | zeroR
168
-	       rm(R, zeroG, zeroR)
169
-	       gc()
170
-       }
171
-       if(saveDate) {
172
-	       protocolData(RG)[["ScanDate"]] = dates$scan
173
-       }
174
-       storageMode(RG) = "lockedEnvironment"
175
-       RG
176
-}
177
-
178
-
179
-readIdatFiles2 <- function(sampleSheet=NULL,
180
-			  arrayNames=NULL,
181
-			  ids=NULL,
182
-			  path=".",
183
-			  arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
184
-			  highDensity=FALSE,
185
-			  sep="_",
186
-			  fileExt=list(green="Grn.idat", red="Red.idat"),
187
-			  saveDate=FALSE) {
188
-#       if(!is.null(arrayNames)) {
189
-#	       arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
190
-#	       if(!is.null(sampleSheet)) {
191
-#		       sampleSheet=NULL
192
-#		       cat("Could not find required info in \'sampleSheet\' - ignoring.  Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
193
-#	       }
194
-#	       pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
195
-#       }
196 15
        if(!is.null(arrayNames)) {
197 16
                pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
198 17
        }
... ...
@@ -244,10 +63,6 @@ readIdatFiles2 <- function(sampleSheet=NULL,
244 63
        }
245 64
        if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
246 65
        if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
247
-##       if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
248
-##	       stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
249
-##		    paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
250
-##       }
251 66
        headerInfo = list(nProbes = rep(NA, narrays),
252 67
                          Barcode = rep(NA, narrays),
253 68
                          ChipType = rep(NA, narrays),
... ...
@@ -594,113 +409,6 @@ readBPM <- function(bpmFile){
594 409
 }
595 410
 
596 411
 
597
-RGtoXY = function(RG, chipType, verbose=TRUE) {
598
-  chipList = c("human1mv1c",             # 1M
599
-               "human370v1c",            # 370CNV
600
-               "human650v3a",            # 650Y
601
-               "human610quadv1b",        # 610 quad
602
-               "human660quadv1a",        # 660 quad
603
-               "human370quadv3c",        # 370CNV quad
604
-               "human550v3b",            # 550K
605
-               "human1mduov3b",          # 1M Duo
606
-               "humanomni1quadv1b",      # Omni1 quad
607
-               "humanomniexpress12v1b") # Omni express 12
608
-  if(missing(chipType)){
609
-	  chipType = match.arg(annotation(RG), chipList)
610
-  } else chipType = match.arg(chipType, chipList)
611
-
612
-  pkgname <- getCrlmmAnnotationName(chipType)
613
-  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
614
-     suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
615
-     msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
616
-     message(strwrap(msg))
617
-     stop("Package ", pkgname, " could not be found.")
618
-     rm(suggCall, msg)
619
-  }
620
-  if(verbose) message("Loading chip annotation information.")
621
-    loader("address.rda", .crlmmPkgEnv, pkgname)
622
-#    data(annotation, package=pkgname, envir=.crlmmPkgEnv)
623
-  aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
624
-  bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
625
-  ids <- names(aids)
626
-  snpbase <- getVarInEnv("base")
627
-
628
-  nsnps = length(aids)
629
-  narrays = ncol(RG)
630
-
631
-#  aidcol = match("AddressA_ID", colnames(annot))
632
-#  if(is.na(aidcol))
633
-#    aidcol = match("Address", colnames(annot))
634
-#  bidcol = match("AddressB_ID", colnames(annot))
635
-#  if(is.na(bidcol))
636
-#    bidcol = match("Address2", colnames(annot))
637
-#  aids = annot[, aidcol]
638
-#  bids = annot[, bidcol]
639
-#  snpids = annot[,"Name"]
640
-#  snpbase = annot[,"SNP"]
641
-  infI = !is.na(bids) & bids!=0
642
-  aord = match(aids, featureNames(RG)) # NAs are possible here
643
-  bord = match(bids, featureNames(RG)) # and here
644
-#  argrg = aids[rrgg]
645
-#  brgrg = bids[rrgg]
646
-
647
-  tmpmat = matrix(as.integer(0), nsnps, narrays)
648
-  rownames(tmpmat) = ids
649
-  colnames(tmpmat) = sampleNames(RG)
650
-  XY = new("NChannelSet", X=tmpmat, Y=tmpmat, zero=tmpmat, # Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
651
-                 annotation=chipType, phenoData=RG@phenoData, protocolData=RG@protocolData, storage.mode="environment")
652
-  rm(tmpmat)
653
-  gc()
654
-
655
-  # First sort out Infinium II SNPs, X -> R (allele A)  and Y -> G (allele B) from the same probe
656
-  XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
657
-  XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
658
-  XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
659
-#  XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
660
-#  XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
661
-#  XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
662
-#  XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
663
-  gc()
664
-
665
-  ## Warning - not 100% sure that the code below is correct - could be more complicated than this
666
-
667
-  # Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
668
-#  infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
669
-
670
-#  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
671
-#  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
672
-#  Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
673
-#  Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
674
-#  Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
675
-#  Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
676
-
677
-  # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
678
-#  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
679
-
680
-#  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
681
-#  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
682
-#  Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
683
-#  Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
684
-#  Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
685
-#  Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
686
-
687
-  #  For now zero out Infinium I probes
688
-  XY@assayData$X[infI,] = 0
689
-  XY@assayData$Y[infI,] = 0
690
-  XY@assayData$zero[infI,] = 0
691
-#  XY@assayData$Xnb[infI,] = 0
692
-#  XY@assayData$Ynb[infI,] = 0
693
-#  XY@assayData$Xse[infI,] = 0
694
-#  XY@assayData$Yse[infI,] = 0
695
-
696
-#  XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
697
-  gc()
698
-
699
-#  storageMode(XY) = "lockedEnvironment"
700
-  XY
701
-}
702
-
703
-
704 412
 RGtoXY = function(RG, chipType, verbose=TRUE) {
705 413
   chipList = c("human1mv1c",             # 1M
706 414
                "human370v1c",            # 370CNV
... ...
@@ -777,31 +485,17 @@ RGtoXY = function(RG, chipType, verbose=TRUE) {
777 485
 
778 486
 #  X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
779 487
 #  Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
780
-#  Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
781
-#  Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
782
-#  Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
783
-#  Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
784 488
 
785 489
   # Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
786 490
 #  infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
787 491
 
788 492
 #  X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
789 493
 #  Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
790
-#  Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
791
-#  Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
792
-#  Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
793
-#  Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
794 494
 
795 495
   #  For now zero out Infinium I probes
796 496
   XY@assayData$X[infI,] = 0
797 497
   XY@assayData$Y[infI,] = 0
798 498
   XY@assayData$zero[infI,] = 0
799
-#  XY@assayData$Xnb[infI,] = 0
800
-#  XY@assayData$Ynb[infI,] = 0
801
-#  XY@assayData$Xse[infI,] = 0
802
-#  XY@assayData$Yse[infI,] = 0
803
-
804
-#  XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
805 499
   gc()
806 500
 
807 501
 #  if(class(XY@assayData$X)[1]=="ff_matrix") {
... ...
@@ -832,10 +526,6 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
832 526
   if(useTarget)
833 527
     targetdist = getVarInEnv("reference")
834 528
 
835
-#  Xqws = Yqws = matrix(0, nrow(XY), ncol(XY))
836
-#  colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X
837
-#  rownames(Xqws) = rownames(Yqws) = featureNames(XY)
838
-
839 529
   if(verbose){
840 530
     message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
841 531
     if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3)
... ...
@@ -875,146 +565,6 @@ stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
875 565
 
876 566
 
877 567
 preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
878
-				fitMixture=TRUE,
879
-				eps=0.1,
880
-				verbose=TRUE,
881
-				seed=1,
882
-				cdfName,
883
-				sns,
884
-				stripNorm=TRUE,
885
-				useTarget=TRUE,
886
-				save.it=FALSE,
887
-				snpFile,
888
-				cnFile) {
889
-  if(stripNorm)
890
-    XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
891
-
892
-## MR: the code below is mostly straight from snprma.R
893
-  if (missing(sns)) sns <- sampleNames(XY) #$X
894
-  if(missing(cdfName))
895
-    cdfName <- annotation(XY)
896
-##  stuffDir <- changeToCrlmmAnnotationName(cdfName)
897
-  pkgname <- getCrlmmAnnotationName(cdfName)
898
-  if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
899
-    suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
900
-    msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
901
-    message(strwrap(msg))
902
-    stop("Package ", pkgname, " could not be found.")
903
-    rm(suggCall, msg)
904
-  }
905
-  if(verbose) message("Loading snp annotation and mixture model parameters.")
906
-  loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
907
-  loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
908
-  loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
909
-  loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
910
-#  data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
911
-  autosomeIndex <- getVarInEnv("autosomeIndex")
912
-  #  pnsa <- getVarInEnv("pnsa")
913
-  #  pnsb <- getVarInEnv("pnsb")
914
-  #  fid <- getVarInEnv("fid")
915
-  #  reference <- getVarInEnv("reference")
916
-  #  aIndex <- getVarInEnv("aIndex")
917
-  #  bIndex <- getVarInEnv("bIndex")
918
-  SMEDIAN <- getVarInEnv("SMEDIAN")
919
-  theKnots <- getVarInEnv("theKnots")
920
-  gns <- getVarInEnv("gns")
921
-
922
-  # separate out copy number probes
923
-  npIndex = getVarInEnv("npProbesFid")
924
-  nprobes = length(npIndex)
925
-  if(length(nprobes)>0) {
926
-    narrays = ncol(XY)
927
-    A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
928
-    B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
929
-
930
-    # new lines below - useful to keep track of zeroed out probes
931
-    zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
932
-
933
-    colnames(A) <- colnames(B) <- colnames(zero) <- sns
934
-    rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
935
-
936
-    cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
937
-    if(save.it & !missing(cnFile)) {
938
-      t0 <- proc.time()
939
-      save(cnAB, file=cnFile)
940
-      t0 <- proc.time()-t0
941
-      if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
942
-    }
943
-    rm(cnAB, B, zero)
944
-  }
945
-
946
-  # next process snp probes
947
-  snpIndex = getVarInEnv("snpProbesFid")
948
-  nprobes <- length(snpIndex)
949
-
950
-  ##We will read each cel file, summarize, and run EM one by one
951
-  ##We will save parameters of EM to use later
952
-  mixtureParams <- matrix(0, 4, narrays)
953
-  SNR <- vector("numeric", narrays)
954
-  SKW <- vector("numeric", narrays)
955
-
956
-  ## This is the sample for the fitting of splines
957
-  ## BC: I like better the idea of the user passing the seed,
958
-  ##     because this might intefere with other analyses
959
-  ##     (like what happened to GCRMA)
960
-  set.seed(seed)
961
-  idx <- sort(sample(autosomeIndex, mixtureSampleSize))
962
-  idx2 <- sample(nprobes, 10^5)
963
-
964
-  ##S will hold (A+B)/2 and M will hold A-B
965
-  ##NOTE: We actually dont need to save S. Only for pics etc...
966
-  ##f is the correction. we save to avoid recomputing
967
-  A <- matrix(as.integer(exprs(channel(XY, "X"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
968
-  B <- matrix(as.integer(exprs(channel(XY, "Y"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
969
-
970
-  # new lines below - useful to keep track of zeroed out SNPs
971
-  zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays)
972
-
973
-  colnames(A) <- colnames(B) <- colnames(zero) <- sns
974
-  rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
975
-
976
-  if(verbose){
977
-     message("Calibrating ", narrays, " arrays.")
978
-     if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
979
-  }
980
-
981
-
982
-  for(i in 1:narrays){
983
-    SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
984
-    if(fitMixture){
985
-      S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
986
-      M <- log2(A[idx, i])-log2(B[idx, i])
987
-
988
-      ##we need to test the choice of eps.. it is not the max diff between funcs
989
-      tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
990
-
991
-      mixtureParams[, i] <- tmp[["coef"]]
992
-      SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
993
-    }
994
-    if(verbose) {
995
-       if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
996
-       else cat(".")
997
-    }
998
-  }
999
-  if (verbose) {
1000
-    if (getRversion() > '2.7.0') close(pb)
1001
-    else cat("\n")
1002
-  }
1003
-  if (!fitMixture) SNR <- mixtureParams <- NA
1004
-  ## gns comes from preprocStuff.rda
1005
-  res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
1006
-
1007
-  if(save.it & !missing(snpFile)) {
1008
-    t0 <- proc.time()
1009
-    save(res, file=snpFile)
1010
-    t0 <- proc.time()-t0
1011
-    if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
1012
-  }
1013
-  return(res)
1014
-}
1015
-
1016
-
1017
-preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1018 568
 				fitMixture=TRUE,
1019 569
 				eps=0.1,
1020 570
 				verbose=TRUE,
... ...
@@ -1088,9 +638,6 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1088 638
   mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
1089 639
   SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
1090 640
   SKW <- initializeBigVector("crlmmSKW-", narrays, "double")
1091
-#  mixtureParams <- matrix(0, 4, narrays)
1092
-#  SNR <- vector("numeric", narrays)
1093
-#  SKW <- vector("numeric", narrays)
1094 641
 
1095 642
   ## This is the sample for the fitting of splines
1096 643
   ## BC: I like better the idea of the user passing the seed,
... ...
@@ -1103,25 +650,6 @@ preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
1103 650
   ##S will hold (A+B)/2 and M will hold A-B
1104 651
   ##NOTE: We actually dont need to save S. Only for pics etc...
1105 652
   ##f is the correction. we save to avoid recomputing
1106
-#  A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
1107
-#  B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
1108
-
1109
-  # new lines below - useful to keep track of zeroed out SNPs
1110
-#  zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays)
1111
-
1112
-#  if(!is.matrix(A)) {
1113
-#     A = A[,]
1114
-#     B = B[,]
1115
-#     zero = zero[,]
1116
-#  }
1117
-
1118
-#  if(!is.integer(A)) {
1119
-#     A = matrix(as.integer(A), nrow(A), ncol(A))
1120
-#     B = matrix(as.integer(B), nrow(B), ncol(B))
1121
-#  }
1122
-
1123
-#  colnames(A) <- colnames(B) <- colnames(zero) <- sns
1124
-#  rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
1125 653
 
1126 654
   A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer")
1127 655
   B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer")
... ...
@@ -1235,62 +763,6 @@ crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1235 763
     res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1236 764
                         seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
1237 765
                         save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1238
-  }else{
1239
-      if(verbose) message("Loading ", snpFile, ".")
1240
-        obj <- load(snpFile)
1241
-        if(verbose) message("Done.")
1242
-        if(!any(obj == "res"))
1243
-          stop("Object in ", snpFile, " seems to be invalid.")
1244
-  }
1245
-  if(row.names) row.names=res[["gns"]] else row.names=NULL
1246
-  if(col.names) col.names=res[["sns"]] else col.names=NULL
1247
-
1248
-  res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
1249
-                  res[["mixtureParams"]], res[["cdfName"]],
1250
-                  gender=gender, row.names=row.names,
1251
-                  col.names=col.names, recallMin=recallMin,
1252
-                  recallRegMin=1000, SNRMin=SNRMin,
1253
-                  returnParams=returnParams, badSNP=badSNP,
1254
-                  verbose=verbose)
1255
-
1256
-  res2[["SNR"]] <- res[["SNR"]]
1257
-  res2[["SKW"]] <- res[["SKW"]]
1258
-  rm(res)
1259
-  gc()
1260
-  return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
1261
-}
1262
-
1263
-
1264
-crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
1265
-                  row.names=TRUE, col.names=TRUE,
1266
-                  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
1267
-                  seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile,
1268
-                  mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
1269
-                  cdfName, sns, recallMin=10, recallRegMin=1000,
1270
-                  returnParams=FALSE, badSNP=.7) {
1271
-  if (save.it & (missing(snpFile) | missing(cnFile)))
1272
-    stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
1273
-  if (load.it & missing(snpFile))
1274
-    stop("'snpFile' is missing and you chose to load.it")
1275
-  if (!missing(snpFile))
1276
-    if (load.it & !file.exists(snpFile)){
1277
-      load.it <- FALSE
1278
-      message("File ", snpFile, " does not exist.")
1279
-      stop("Cannot load SNP data.")
1280
-  }
1281
-  if (!load.it){
1282
-    if(!missing(RG)) {
1283
-      if(missing(XY))
1284
-        XY = RGtoXY2(RG, chipType=cdfName)
1285
-      else
1286
-        stop("Both RG and XY specified - please use one or the other")
1287
-    }
1288
-    if (missing(sns)) sns <- sampleNames(XY) #$X
1289
-
1290
-    res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
1291
-                        seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
1292
-                      #  save.it=save.it, snpFile=snpFile, cnFile=cnFile)
1293
-
1294 766
 #    fD = featureData(XY)
1295 767
 #    phenD = XY@phenoData
1296 768
 #    protD = XY@protocolData
... ...
@@ -1378,13 +850,14 @@ crlmmIlluminaV2 = function(sampleSheet=NULL,
1378 850
 #                          rgFile,
1379 851
 			  stripNorm=TRUE,
1380 852
 			  useTarget=TRUE,
1381
-          	          row.names=TRUE,
853
+			  row.names=TRUE,
1382 854
 			  col.names=TRUE,
1383 855
 			  probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
1384 856
                           seed=1, save.it=FALSE, snpFile, cnFile,
1385 857
                           mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
1386 858
                           cdfName, sns, recallMin=10, recallRegMin=1000,
1387 859
                           returnParams=FALSE, badSNP=.7) {
860
+
1388 861
   if(missing(cdfName)) stop("must specify cdfName")
1389 862
   if(!isValidCdfName(cdfName)) stop("cdfName not valid.  see validCdfNames")
1390 863
 #  if(missing(sns)) sns <- basename(arrayNames)
... ...
@@ -318,161 +318,6 @@ setReplaceMethod("flags", signature=signature(object="CNSet", value="ff_matrix")
318 318
 })
319 319
 
320 320
 
321
-##setMethod("ellipse", "CNSet", function(x, copynumber, batch, ...){
322
-##	ellipse.CNSet(x, copynumber, batch, ...)
323
-##})
324
-
325
-ACN.X <- function(object, allele, i, j){
326
-	acn <- list()
327
-	batches <- unique(batch(object)[j])
328
-	for(k in seq_along(batches)){
329
-		l <- match(batches[k], bns)
330
-		nuA <- nuA(object)[ii, l]
331
-		nuB <- nuB(object)[ii, l]
332
-		phiA <- phiA(object)[ii, l]
333
-		phiB <- phiB(object)[ii, l]
334
-		phiPrimeA <- phiPrimeA(object)[ii, l]
335
-		phiPrimeB <- phiPrimeB(object)[ii, l]
336
-		if(all(is.na(phiPrimeA))){
337
-			I <- allele(object, allele)[ii, j]
338
-			if(allele=="A") acn[[k]] <- 1/phiA*(I - nuA)
339
-			if(allele=="B") acn[[k]] <- 1/phiB*(I - nuB)
340
-		} else {
341
-			A <- A(object)[ii, j]
342
-			B <- B(object)[ii, j]
343
-			phistar <- phiPrimeB/phiA
344
-			tmp <- (B-nuB - phistar*A + phistar*nuA)/phiB
345
-			cb <- tmp/(1-phistar*phiPrime/phiB)
346
-			ca <- A-nuA-phiPrimeA*cb/phiA
347
-			if(allele=="A") acn[[k]] <- ca
348
-			if(allele=="B") acn[[k]] <- cb
349
-		}
350
-	}
351
-<<<<<<< HEAD
352
-	return(acn)
353
-}
354
-=======
355
-	x
356
-})
357
-
358
-
359
-setMethod("lM", "CNSetLM", function(object) object@lM)
360
-setReplaceMethod("lM", c("CNSetLM", "list_or_ffdf"), function(object, value){
361
-	object@lM <- value
362
-	object
363
-})
364
-
365
-
366
-
367
-setMethod("open", "CNSetLM", function(con,...){
368
-	callNextMethod(con,...)
369
-	lapply(physical(lM(con)), open)
370
-})
371
-
372
-setAs("SnpSuperSet", "CNSetLM", function(from, to){
373
-	stopifnot("batch" %in% varLabels(protocolData(from)))
374
-	cnSet <- new("CNSetLM",
375
-		     alleleA=A(from),
376
-		     alleleB=B(from),
377
-		     call=snpCall(from),
378
-		     callProbability=snpCallProbability(from),
379
-		     CA=initializeBigMatrix("CA", nrow(from), ncol(from)),
380
-		     CB=initializeBigMatrix("CB", nrow(from), ncol(from)),
381
-		     annotation=annotation(from),
382
-		     featureData=featureData(from),
383
-		     experimentData=experimentData(from),
384
-		     protocolData=protocolData(from),
385
-		     phenoData=phenoData(from))
386
-	lM(cnSet) <- initializeParamObject(list(featureNames(cnSet), unique(protocolData(from)$batch)))
387
-	return(cnSet)
388
-})
389
-
390
-setMethod("computeCopynumber", "CNSet",
391
-	  function(object,
392
-		   MIN.OBS,
393
-		   DF.PRIOR,
394
-		   bias.adj,
395
-		   prior.prob,
396
-		   seed,
397
-		   verbose,
398
-		   GT.CONF.THR,
399
-		   PHI.THR,
400
-		   nHOM.THR,
401
-		   MIN.NU,
402
-		   MIN.PHI,
403
-		   THR.NU.PHI,
404
-		   thresholdCopynumber){
405
-	## to do the bias adjustment, initial estimates of the parameters are needed
406
-	##  The initial estimates are gotten by running computeCopynumber with cnOptions[["bias.adj"]]=FALSE
407
-		  cnOptions <- list(
408
-				    MIN.OBS=MIN.OBS,
409
-				    DF.PRIOR=DF.PRIOR,
410
-				    bias.adj=bias.adj,
411
-				    prior.prob=prior.prob,
412
-				    seed=seed,
413
-				    verbose=verbose,
414
-				    GT.CONF.THR=GT.CONF.THR,
415
-				    PHI.THR=PHI.THR,
416
-				    nHOM.THR=nHOM.THR,
417
-				    MIN.NU=MIN.NU,
418
-				    MIN.PHI=MIN.PHI,
419
-				    THR.NU.PHI=THR.NU.PHI,
420
-				    thresholdCopynumber=thresholdCopynumber)
421
-	bias.adj <- cnOptions[["bias.adj"]]
422
-	if(bias.adj & all(is.na(CA(object)))){
423
-		cnOptions[["bias.adj"]] <- FALSE
424
-	}
425
-	object <- computeCopynumber.CNSet(object, cnOptions)				
426
-	if(bias.adj & !cnOptions[["bias.adj"]]){
427
-		## Do a second iteration with bias adjustment
428
-		cnOptions[["bias.adj"]] <- TRUE
429
-		object <- computeCopynumber.CNSet(object, cnOptions)
430
-	}
431
-	object
432
-})
433
-setMethod("copyNumber", "CNSet", function(object){
434
-	I <- isSnp(object)
435
-	ffIsLoaded <- class(calls(object))[[1]]=="ff"
436
-	CA <- CA(object)
437
-	CB <- CB(object)
438
-	if(ffIsLoaded){
439
-		open(CA)
440
-		open(CB)
441
-		CA <- as.matrix(CA[,])
442
-		CB <- as.matrix(CB[,])
443
-	}
444
-	CN <- CA + CB
445
-	##For nonpolymorphic probes, CA is the total copy number
446
-	CN[!I, ] <- CA(object)[!I, ]
447
-	CN <- CN/100
448
-	CN
449
-})
450
-
451
-
452
-
453
-##setMethod("copyNumber", "CNSet", function(object){
454
-##	I <- isSnp(object)
455
-##	CA <- CA(object)
456
-##	CB <- CB(object)
457
-##	CN <- CA + CB
458
-##	##For nonpolymorphic probes, CA is the total copy number
459
-##	CN[!I, ] <- CA(object)[!I, ]
460
-##	CN
461
-##})
462
->>>>>>> Exporting list and ffdf classes so that lM<- assignment works
463
-
464
-ACN.SNP.autosome <- function(object, allele, i, j){
465
-
466
-}
467
-
468
-ACN.NP.autosome <- function(object, allele, i, j){
469
-
470
-}
471
-
472
-ACN.SNP.X <- function(object, allele, i, j){
473
-
474
-}
475
-
476 321
 ## allele A
477 322
 ##   autosome SNPs
478 323
 ##   autosome NPs
... ...
@@ -552,11 +397,7 @@ C3 <- function(object, allele, marker.index, batch.index, sample.index){
552 397
 	return(acn)
553 398
 }
554 399
 
555
-#C4 <- function(object, marker.index, batch.index, sample.index){
556
-#	bg <- nuB(object)[marker.index, batch.index]
557
-#	slope <- phiB(object)[marker.index, batch.index]
558
-#	I <- allele(object, allele)[marker.index, sample.index]
559
-#}
400
+
560 401
 
561 402
 
562 403
 ACN <- function(object, allele, i , j){