cd8b85b6 |
# function below works OK provided all .idat files are in the current working directory
# - could add an option to allow files in Illumina directory structure to be handled
# or to use the optional 'Path' column in sampleSheet
# - there is a lot of header information that is currently discarded - could try and store this somewhere in the resulting NChannelSet
|
5fcfed7d |
readIdatFiles <- function(sampleSheet=NULL,
arrayNames=NULL,
ids=NULL,
path=".",
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
highDensity=FALSE,
sep="_",
fileExt=list(green="Grn.idat", red="Red.idat"),
saveDate=FALSE) {
# if(!is.null(arrayNames)) {
# arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
# if(!is.null(sampleSheet)) {
# sampleSheet=NULL
# cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
# }
# pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
# }
if(!is.null(arrayNames)) {
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
}
if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
if(is.null(arrayNames)){
##arrayNames=NULL
if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
barcode = sampleSheet[,arrayInfoColNames$barcode]
arrayNames=barcode
}
|
56816837 |
if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
|
5fcfed7d |
position = sampleSheet[,arrayInfoColNames$position]
if(is.null(arrayNames))
arrayNames=position
else
arrayNames = paste(arrayNames, position, sep=sep)
if(highDensity) {
hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
for(i in names(hdExt))
arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
}
}
}
pd = new("AnnotatedDataFrame", data = sampleSheet)
|
56816837 |
sampleNames(pd) <- basename(arrayNames)
|
5fcfed7d |
}
if(is.null(arrayNames)) {
arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
if(!is.null(sampleSheet)) {
sampleSheet=NULL
cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
}
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
}
|
56816837 |
|
cfdeb14b |
narrays = length(arrayNames)
|
cd8b85b6 |
grnfiles = paste(arrayNames, fileExt$green, sep=sep)
redfiles = paste(arrayNames, fileExt$red, sep=sep)
if(length(grnfiles)==0 || length(redfiles)==0)
|
64463948 |
stop("Cannot find .idat files")
|
cd8b85b6 |
if(length(grnfiles)!=length(redfiles))
|
64463948 |
stop("Cannot find matching .idat files")
|
bc16b510 |
if(path[1] != "."){
|
e4e0b214 |
grnidats = file.path(path, grnfiles)
redidats = file.path(path, redfiles)
} else {
|
5fcfed7d |
message("path arg not set. Assuming files are in local directory")
|
e4e0b214 |
grnidats <- grnfiles
redidats <- redfiles
|
2ae7850e |
}
|
d3a7556e |
if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
|
56816837 |
if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
|
e4e0b214 |
## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
## }
|
cfdeb14b |
headerInfo = list(nProbes = rep(NA, narrays),
Barcode = rep(NA, narrays),
ChipType = rep(NA, narrays),
Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
Position = rep(NA, narrays)) # this may also vary a bit
|
5fcfed7d |
dates = list(decode=rep(NA, narrays),
scan=rep(NA, narrays))
## read in the data
|
cd8b85b6 |
for(i in seq(along=arrayNames)) {
|
64463948 |
cat("reading", arrayNames[i], "\t")
idsG = idsR = G = R = NULL
cat(paste(sep, fileExt$green, sep=""), "\t")
G = readIDAT(grnidats[i])
idsG = rownames(G$Quants)
headerInfo$nProbes[i] = G$nSNPsRead
headerInfo$Barcode[i] = G$Barcode
headerInfo$ChipType[i] = G$ChipType
headerInfo$Manifest[i] = G$Unknown$MostlyNull
headerInfo$Position[i] = G$Unknowns$MostlyA
|
8499df94 |
if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
|
64463948 |
## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
## but differed by a few SNPs for some reason - most of the chip was the same though
## stop("Chips are not of all of the same type - please check your data")
warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
next()
}
dates$decode[i] = G$RunInfo[1, 1]
dates$scan[i] = G$RunInfo[2, 1]
if(i==1) {
|
fe3e3f61 |
if(is.null(ids) && !is.null(G)){
|
64463948 |
ids = idsG
|
fe3e3f61 |
}else stop("Could not find probe IDs")
|
64463948 |
nprobes = length(ids)
narrays = length(arrayNames)
|
5fcfed7d |
tmpmat = matrix(NA, nprobes, narrays)
rownames(tmpmat) = ids
if(!is.null(sampleSheet)){
colnames(tmpmat) = sampleSheet$Sample_ID
}else colnames(tmpmat) = arrayNames
RG <- new("NChannelSet",
R=tmpmat,
G=tmpmat,
zero=tmpmat,
# Rnb=tmpmat,
# Gnb=tmpmat,
# Rse=tmpmat,
# Gse=tmpmat,
annotation=headerInfo$Manifest[1],
phenoData=pd,
storage.mode="environment")
rm(tmpmat)
|
64463948 |
gc()
}
if(length(ids)==length(idsG)) {
if(sum(ids==idsG)==nprobes) {
RG@assayData$G[,i] = G$Quants[, "Mean"]
|
5fcfed7d |
zeroG = G$Quants[, "NBeads"]==0
|
4e4601af |
# RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
# RG@assayData$Gse[,i] = G$Quants[, "SD"]
|
64463948 |
}
|
fe3e3f61 |
} else {
|
64463948 |
indG = match(ids, idsG)
RG@assayData$G[,i] = G$Quants[indG, "Mean"]
|
5fcfed7d |
zeroG = G$Quants[indG, "NBeads"]==0
|
4e4601af |
# RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
|
5fcfed7d |
# RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
|
64463948 |
}
rm(G)
gc()
|
56816837 |
|
64463948 |
cat(paste(sep, fileExt$red, sep=""), "\n")
R = readIDAT(redidats[i])
idsR = rownames(R$Quants)
|
56816837 |
if(length(ids)==length(idsG)) {
|
64463948 |
if(sum(ids==idsR)==nprobes) {
RG@assayData$R[,i] = R$Quants[ ,"Mean"]
|
5fcfed7d |
zeroR = R$Quants[ ,"NBeads"]==0
|
4e4601af |
# RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
# RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
|
64463948 |
}
|
fe3e3f61 |
} else {
|
64463948 |
indR = match(ids, idsR)
RG@assayData$R[,i] = R$Quants[indR, "Mean"]
|
5fcfed7d |
zeroR = R$Quants[indR, "NBeads"]==0
|
4e4601af |
# RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
# RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
|
64463948 |
}
|
5fcfed7d |
RG@assayData$zero[,i] = zeroG | zeroR
|
4e4601af |
rm(R, zeroG, zeroR)
|
64463948 |
gc()
|
cd8b85b6 |
}
|
5fcfed7d |
if(saveDate) {
protocolData(RG)[["ScanDate"]] = dates$scan
}
storageMode(RG) = "lockedEnvironment"
RG
|
cd8b85b6 |
}
|
e609f78b |
readIdatFiles2 <- function(sampleSheet=NULL,
arrayNames=NULL,
ids=NULL,
path=".",
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
highDensity=FALSE,
sep="_",
fileExt=list(green="Grn.idat", red="Red.idat"),
saveDate=FALSE) {
# if(!is.null(arrayNames)) {
# arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
# if(!is.null(sampleSheet)) {
# sampleSheet=NULL
# cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
# }
# pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
# }
if(!is.null(arrayNames)) {
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
}
if(!is.null(sampleSheet)) { # get array info from Illumina's sample sheet
if(is.null(arrayNames)){
##arrayNames=NULL
if(!is.null(arrayInfoColNames$barcode) && (arrayInfoColNames$barcode %in% colnames(sampleSheet))) {
barcode = sampleSheet[,arrayInfoColNames$barcode]
arrayNames=barcode
}
|
56816837 |
if(!is.null(arrayInfoColNames$position) && (arrayInfoColNames$position %in% colnames(sampleSheet))) {
|
e609f78b |
position = sampleSheet[,arrayInfoColNames$position]
if(is.null(arrayNames))
arrayNames=position
else
arrayNames = paste(arrayNames, position, sep=sep)
if(highDensity) {
hdExt = list(A="R01C01", B="R01C02", C="R02C01", D="R02C02")
for(i in names(hdExt))
arrayNames = sub(paste(sep, i, sep=""), paste(sep, hdExt[[i]], sep=""), arrayNames)
}
}
}
pd = new("AnnotatedDataFrame", data = sampleSheet)
|
56816837 |
sampleNames(pd) <- basename(arrayNames)
|
e609f78b |
}
if(is.null(arrayNames)) {
arrayNames = gsub(paste(sep, fileExt$green, sep=""), "", dir(pattern=fileExt$green, path=path))
if(!is.null(sampleSheet)) {
sampleSheet=NULL
cat("Could not find required info in \'sampleSheet\' - ignoring. Check \'sampleSheet\' and/or \'arrayInfoColNames\'\n")
}
pd = new("AnnotatedDataFrame", data = data.frame(Sample_ID=arrayNames))
}
|
56816837 |
|
e609f78b |
narrays = length(arrayNames)
grnfiles = paste(arrayNames, fileExt$green, sep=sep)
redfiles = paste(arrayNames, fileExt$red, sep=sep)
if(length(grnfiles)==0 || length(redfiles)==0)
stop("Cannot find .idat files")
if(length(grnfiles)!=length(redfiles))
stop("Cannot find matching .idat files")
|
6ec3ec41 |
if(path[1] != "." & path[1] != ""){
|
e609f78b |
grnidats = file.path(path, grnfiles)
redidats = file.path(path, redfiles)
} else {
message("path arg not set. Assuming files are in local directory")
grnidats <- grnfiles
redidats <- redfiles
}
if(!all(file.exists(grnidats))) stop("Missing some of the *Grn.idat files")
|
56816837 |
if(!all(file.exists(redidats))) stop("Missing some of the *Red.idat files")
|
e609f78b |
## if(!all(c(redfiles,grnfiles) %in% dir(path=path))){
## stop("Missing .idat files: red\n", paste(redfiles[!(redfiles %in% dir(path=path))], sep=" "), "\n green\n",
## paste(grnfiles[!(grnfiles %in% dir(path=path))], sep=" "))
## }
headerInfo = list(nProbes = rep(NA, narrays),
Barcode = rep(NA, narrays),
ChipType = rep(NA, narrays),
Manifest = rep(NA, narrays), # not sure about this one - sometimes blank
Position = rep(NA, narrays)) # this may also vary a bit
dates = list(decode=rep(NA, narrays),
scan=rep(NA, narrays))
## read in the data
for(i in seq(along=arrayNames)) {
cat("reading", arrayNames[i], "\t")
idsG = idsR = G = R = NULL
cat(paste(sep, fileExt$green, sep=""), "\t")
G = readIDAT(grnidats[i])
idsG = rownames(G$Quants)
headerInfo$nProbes[i] = G$nSNPsRead
headerInfo$Barcode[i] = G$Barcode
headerInfo$ChipType[i] = G$ChipType
headerInfo$Manifest[i] = G$Unknown$MostlyNull
headerInfo$Position[i] = G$Unknowns$MostlyA
if(headerInfo$nProbes[i]>(headerInfo$nProbes[1]+10000) || headerInfo$nProbes[i]<(headerInfo$nProbes[1]-10000)) {
## headerInfo$ChipType[i]!=headerInfo$ChipType[1] || headerInfo$Manifest[i]!=headerInfo$Manifest[1]) {
## || headerInfo$nProbes[i]!=headerInfo$nProbes[1] ## removed this condition as some arrays used the same manifest
## but differed by a few SNPs for some reason - most of the chip was the same though
## stop("Chips are not of all of the same type - please check your data")
warning("Chips are not of the same type. Skipping ", basename(grnidats[i]), " and ", basename(redidats[i]))
next()
}
dates$decode[i] = G$RunInfo[1, 1]
dates$scan[i] = G$RunInfo[2, 1]
if(i==1) {
if(is.null(ids) && !is.null(G)){
ids = idsG
} else stop("Could not find probe IDs")
nprobes = length(ids)
narrays = length(arrayNames)
# tmpmat = matrix(NA, nprobes, narrays)
# rownames(tmpmat) = ids
# fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
RG <- new("NChannelSet",
R=initializeBigMatrix(name="R", nr=nprobes, nc=narrays, vmode="integer"),
G=initializeBigMatrix(name="G", nr=nprobes, nc=narrays, vmode="integer"),
zero=initializeBigMatrix(name="zero", nr=nprobes, nc=narrays, vmode="integer"),
# featureData=fD,
# annotation=cdfName)
# R=tmpmat,
# G=tmpmat,
# zero=tmpmat,
# Rnb=tmpmat,
# Gnb=tmpmat,
# Rse=tmpmat,
# Gse=tmpmat,
annotation=headerInfo$Manifest[1],
phenoData=pd,
storage.mode="environment")
featureNames(RG) = ids
# rm(tmpmat)
if(!is.null(sampleSheet) && !is.null(sampleSheet$Sample_ID)){
sampleNames(RG) = sampleSheet$Sample_ID
} else sampleNames(RG) = arrayNames
gc()
}
if(length(ids)==length(idsG)) {
if(sum(ids==idsG)==nprobes) {
RG@assayData$G[,i] = G$Quants[, "Mean"]
zeroG = G$Quants[, "NBeads"]==0
# RG@assayData$Gnb[,i] = G$Quants[, "NBeads"]
# RG@assayData$Gse[,i] = G$Quants[, "SD"]
}
} else {
indG = match(ids, idsG)
RG@assayData$G[,i] = G$Quants[indG, "Mean"]
zeroG = G$Quants[indG, "NBeads"]==0
# RG@assayData$Gnb[,i] = G$Quants[indG, "NBeads"]
# RG@assayData$Gse[,i] = G$Quants[indG, "SD"]
}
rm(G)
gc()
|
56816837 |
|
e609f78b |
cat(paste(sep, fileExt$red, sep=""), "\n")
R = readIDAT(redidats[i])
idsR = rownames(R$Quants)
|
56816837 |
if(length(ids)==length(idsG)) {
|
e609f78b |
if(sum(ids==idsR)==nprobes) {
RG@assayData$R[,i] = R$Quants[ ,"Mean"]
zeroR = R$Quants[ ,"NBeads"]==0
# RG@assayData$Rnb[,i] = R$Quants[ ,"NBeads"]
# RG@assayData$Rse[,i] = R$Quants[ ,"SD"]
}
} else {
indR = match(ids, idsR)
RG@assayData$R[,i] = R$Quants[indR, "Mean"]
zeroR = R$Quants[indR, "NBeads"]==0
# RG@assayData$Rnb[,i] = R$Quants[indR, "NBeads"]
# RG@assayData$Rse[,i] = R$Quants[indR, "SD"]
}
RG@assayData$zero[,i] = zeroG | zeroR
rm(R, zeroG, zeroR)
gc()
}
if(saveDate) {
protocolData(RG)[["ScanDate"]] = dates$scan
}
storageMode(RG) = "lockedEnvironment"
RG
}
|
64463948 |
## the readIDAT() and readBPM() functions below were provided by Keith Baggerly, 27/8/2008
|
cd8b85b6 |
readIDAT <- function(idatFile){
fileSize <- file.info(idatFile)$size
tempCon <- file(idatFile,"rb")
prefixCheck <- readChar(tempCon,4)
if(prefixCheck != "IDAT"){
}
|
56816837 |
versionNumber <- readBin(tempCon, "integer", n=1, size=8,
|
cd8b85b6 |
endian="little", signed=FALSE)
|
56816837 |
nFields <- readBin(tempCon, "integer", n=1, size=4,
|
cd8b85b6 |
endian="little", signed=FALSE)
fields <- matrix(0,nFields,3);
colnames(fields) <- c("Field Code", "Byte Offset", "Bytes")
for(i1 in 1:nFields){
|
56816837 |
fields[i1,"Field Code"] <-
|
cd8b85b6 |
readBin(tempCon, "integer", n=1, size=2, endian="little", signed=FALSE)
|
56816837 |
fields[i1,"Byte Offset"] <-
|
cd8b85b6 |
readBin(tempCon, "integer", n=1, size=8, endian="little", signed=FALSE)
}
knownCodes <- c(1000, 102, 103, 104, 107, 200, 300, 400,
401, 402, 403, 404, 405, 406, 407, 408, 409)
|
56816837 |
names(knownCodes) <-
|
cd8b85b6 |
c("nSNPsRead", # 1000
"IlluminaID", # 102
"SD", # 103
"Mean", # 104
"NBeads", # 107
"MidBlock", # 200
"RunInfo", # 300
"RedGreen", # 400
"MostlyNull", # 401
"Barcode", # 402
"ChipType", # 403
"MostlyA", # 404
"Unknown.1", # 405
"Unknown.2", # 406
"Unknown.3", # 407
"Unknown.4", # 408
"Unknown.5" # 409
)
nNewFields <- 1
rownames(fields) <- paste("Null", 1:nFields)
for(i1 in 1:nFields){
temp <- match(fields[i1,"Field Code"], knownCodes)
if(!is.na(temp)){
rownames(fields)[i1] <- names(knownCodes)[temp]
}else{
rownames(fields)[i1] <- paste("newField", nNewFields, sep=".")
nNewFields <- nNewFields + 1
}
}
seek(tempCon, fields["nSNPsRead", "Byte Offset"])
|
56816837 |
nSNPsRead <- readBin(tempCon, "integer", n=1, size=4,
|
cd8b85b6 |
endian="little", signed=FALSE)
seek(tempCon, fields["IlluminaID", "Byte Offset"])
|
56816837 |
IlluminaID <- readBin(tempCon, "integer", n=nSNPsRead, size=4,
|
cd8b85b6 |
endian="little", signed=FALSE)
seek(tempCon, fields["SD", "Byte Offset"])
|
56816837 |
SD <- readBin(tempCon, "integer", n=nSNPsRead, size=2,
|
cd8b85b6 |
endian="little", signed=FALSE)
seek(tempCon, fields["Mean", "Byte Offset"])
|
56816837 |
Mean <- readBin(tempCon, "integer", n=nSNPsRead, size=2,
|
cd8b85b6 |
endian="little", signed=FALSE)
seek(tempCon, fields["NBeads", "Byte Offset"])
NBeads <- readBin(tempCon, "integer", n=nSNPsRead, size=1, signed=FALSE)
seek(tempCon, fields["MidBlock", "Byte Offset"])
|
56816837 |
nMidBlockEntries <- readBin(tempCon, "integer", n=1, size=4,
|
cd8b85b6 |
endian="little", signed=FALSE)
|
56816837 |
MidBlock <- readBin(tempCon, "integer", n=nMidBlockEntries, size=4,
|
cd8b85b6 |
endian="little", signed=FALSE)
seek(tempCon, fields["RunInfo", "Byte Offset"])
|
56816837 |
nRunInfoBlocks <- readBin(tempCon, "integer", n=1, size=4,
|
cd8b85b6 |
endian="little", signed=FALSE)
RunInfo <- matrix(NA, nRunInfoBlocks, 5)
|
56816837 |
colnames(RunInfo) <- c("RunTime", "BlockType", "BlockPars",
|
cd8b85b6 |
"BlockCode", "CodeVersion")
|
b938db26 |
for(i1 in 1:2) { #nRunInfoBlocks){ ## MR edit
|
cd8b85b6 |
for(i2 in 1:5){
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
RunInfo[i1,i2] <- readChar(tempCon, nChars)
}
}
seek(tempCon, fields["RedGreen", "Byte Offset"])
|
56816837 |
RedGreen <- readBin(tempCon, "numeric", n=1, size=4,
|
cd8b85b6 |
endian="little", signed=FALSE)
|
56816837 |
#RedGreen <- readBin(tempCon, "integer", n=4, size=1,
|
cd8b85b6 |
# endian="little", signed=FALSE)
seek(tempCon, fields["MostlyNull", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
MostlyNull <- readChar(tempCon, nChars)
|
cd8b85b6 |
seek(tempCon, fields["Barcode", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
Barcode <- readChar(tempCon, nChars)
|
cd8b85b6 |
seek(tempCon, fields["ChipType", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
ChipType <- readChar(tempCon, nChars)
|
cd8b85b6 |
seek(tempCon, fields["MostlyA", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
MostlyA <- readChar(tempCon, nChars)
|
cd8b85b6 |
seek(tempCon, fields["Unknown.1", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
Unknown.1 <- readChar(tempCon, nChars)
|
cd8b85b6 |
seek(tempCon, fields["Unknown.2", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
Unknown.2 <- readChar(tempCon, nChars)
|
cd8b85b6 |
seek(tempCon, fields["Unknown.3", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
Unknown.3 <- readChar(tempCon, nChars)
|
cd8b85b6 |
seek(tempCon, fields["Unknown.4", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
Unknown.4 <- readChar(tempCon, nChars)
|
cd8b85b6 |
seek(tempCon, fields["Unknown.5", "Byte Offset"])
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
Unknown.5 <- readChar(tempCon, nChars)
|
cd8b85b6 |
close(tempCon)
|
56816837 |
Unknowns <-
|
cd8b85b6 |
list(MostlyNull=MostlyNull,
MostlyA=MostlyA,
Unknown.1=Unknown.1,
Unknown.2=Unknown.2,
Unknown.3=Unknown.3,
Unknown.4=Unknown.4,
Unknown.5=Unknown.5)
Quants <- cbind(Mean, SD, NBeads)
colnames(Quants) <- c("Mean", "SD", "NBeads")
rownames(Quants) <- as.character(IlluminaID)
|
56816837 |
idatValues <-
list(fileSize=fileSize,
versionNumber=versionNumber,
nFields=nFields,
|
cd8b85b6 |
fields=fields,
|
56816837 |
nSNPsRead=nSNPsRead,
#IlluminaID=IlluminaID,
#SD=SD,
#Mean=Mean,
|
cd8b85b6 |
#NBeads=NBeads,
Quants=Quants,
|
56816837 |
MidBlock=MidBlock,
RunInfo=RunInfo,
RedGreen=RedGreen,
Barcode=Barcode,
|
cd8b85b6 |
ChipType=ChipType,
Unknowns=Unknowns)
idatValues
}
readBPM <- function(bpmFile){
## Reads and parses Illumina BPM files
|
56816837 |
|
cd8b85b6 |
fileSize <- file.info(bpmFile)$size
tempCon <- file(bpmFile,"rb")
# The first few bytes of the egtFile are some type of
|
56816837 |
# header, but there's no related byte offset information.
|
cd8b85b6 |
prefixCheck <- readChar(tempCon,3) ## should be "BPM"
null.1 <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
|
56816837 |
## should be 1
versionNumber <-
|
cd8b85b6 |
readBin(tempCon, "integer", n=1, size=4, endian="little", signed=FALSE)
## should be 4
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
chipType <- readChar(tempCon, nChars)
null.2 <- readBin(tempCon, "integer", n=2, size=1, signed=FALSE)
csvLines <- readLines(tempCon, 22)
entriesByteOffset <- seek(tempCon);
nEntries <- readBin(tempCon, "integer", n=1, size=4,
endian="little", signed=FALSE)
|
56816837 |
|
cd8b85b6 |
if(FALSE){
|
56816837 |
|
cd8b85b6 |
snpIndexByteOffset <- seek(tempCon)
snpIndex <- readBin(tempCon, "integer", n=nEntries, size=4,
endian="little", signed=FALSE)
## for the 1M array, these are simply in order from 1 to 1072820.
|
56816837 |
|
cd8b85b6 |
snpNamesByteOffset <- seek(tempCon)
snpNames <- rep("A", nEntries)
for(i1 in 1:nEntries){
nChars <- readBin(tempCon, "integer", n=1, size=1, signed=FALSE)
snpNames[i1] <- readChar(tempCon, nChars)
}
}
seek(tempCon, 15278138)
|
56816837 |
|
cd8b85b6 |
normIDByteOffset <- seek(tempCon)
normID <- readBin(tempCon, "integer", n=nEntries, size=1, signed=FALSE) + 1
|
56816837 |
|
cd8b85b6 |
newBlockByteOffset <- seek(tempCon)
newBlock <- readBin(tempCon, "integer", n=10000, size=1, signed=FALSE)
|
56816837 |
|
cd8b85b6 |
close(tempCon)
byteOffsets <- list(entriesByteOffset=entriesByteOffset,
|
56816837 |
#snpIndexByteOffset=snpIndexByteOffset,
|
cd8b85b6 |
#snpNamesByteOffset=snpNamesByteOffset,
normIDByteOffset=normIDByteOffset,
newBlockByteOffset=newBlockByteOffset)
|
56816837 |
|
cd8b85b6 |
allStuff <- list(prefixCheck=prefixCheck,
null.1=null.1,
versionNumber=versionNumber,
chipType=chipType,
null.2=null.2,
csvLines=csvLines,
nEntries=nEntries,
#snpIndex=snpIndex,
#snpNames=snpNames,
normID=normID,
newBlock=newBlock,
byteOffsets=byteOffsets)
allStuff
|
56816837 |
|
cd8b85b6 |
}
|
5fcfed7d |
RGtoXY = function(RG, chipType, verbose=TRUE) {
chipList = c("human1mv1c", # 1M
"human370v1c", # 370CNV
"human650v3a", # 650Y
"human610quadv1b", # 610 quad
"human660quadv1a", # 660 quad
"human370quadv3c", # 370CNV quad
"human550v3b", # 550K
"human1mduov3b", # 1M Duo
|
56816837 |
"humanomni1quadv1b", # Omni1 quad
"humanomniexpress12v1b") # Omni express 12
|
5fcfed7d |
if(missing(chipType)){
chipType = match.arg(annotation(RG), chipList)
} else chipType = match.arg(chipType, chipList)
|
56816837 |
|
5fcfed7d |
pkgname <- getCrlmmAnnotationName(chipType)
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
message(strwrap(msg))
stop("Package ", pkgname, " could not be found.")
rm(suggCall, msg)
}
if(verbose) message("Loading chip annotation information.")
loader("address.rda", .crlmmPkgEnv, pkgname)
# data(annotation, package=pkgname, envir=.crlmmPkgEnv)
aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
ids <- names(aids)
snpbase <- getVarInEnv("base")
|
56816837 |
|
5fcfed7d |
nsnps = length(aids)
narrays = ncol(RG)
|
56816837 |
|
5fcfed7d |
# aidcol = match("AddressA_ID", colnames(annot))
# if(is.na(aidcol))
# aidcol = match("Address", colnames(annot))
# bidcol = match("AddressB_ID", colnames(annot))
|
56816837 |
# if(is.na(bidcol))
|
5fcfed7d |
# bidcol = match("Address2", colnames(annot))
# aids = annot[, aidcol]
# bids = annot[, bidcol]
# snpids = annot[,"Name"]
|
56816837 |
# snpbase = annot[,"SNP"]
infI = !is.na(bids) & bids!=0
|
5fcfed7d |
aord = match(aids, featureNames(RG)) # NAs are possible here
bord = match(bids, featureNames(RG)) # and here
# argrg = aids[rrgg]
# brgrg = bids[rrgg]
|
a11efc9c |
tmpmat = matrix(as.integer(0), nsnps, narrays)
|
5fcfed7d |
rownames(tmpmat) = ids
colnames(tmpmat) = sampleNames(RG)
XY = new("NChannelSet", X=tmpmat, Y=tmpmat, zero=tmpmat, # Xnb=tmpmat, Ynb=tmpmat, Xse=tmpmat, Yse=tmpmat, zero=tmpmat,
annotation=chipType, phenoData=RG@phenoData, protocolData=RG@protocolData, storage.mode="environment")
rm(tmpmat)
gc()
|
56816837 |
|
5fcfed7d |
# First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe
XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
# XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
# XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
gc()
|
56816837 |
|
5fcfed7d |
## Warning - not 100% sure that the code below is correct - could be more complicated than this
|
56816837 |
|
5fcfed7d |
# Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
|
56816837 |
|
5fcfed7d |
# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
|
56816837 |
|
5fcfed7d |
# Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
|
56816837 |
|
5fcfed7d |
# For now zero out Infinium I probes
XY@assayData$X[infI,] = 0
XY@assayData$Y[infI,] = 0
|
56816837 |
XY@assayData$zero[infI,] = 0
|
5fcfed7d |
# XY@assayData$Xnb[infI,] = 0
# XY@assayData$Ynb[infI,] = 0
# XY@assayData$Xse[infI,] = 0
# XY@assayData$Yse[infI,] = 0
# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
gc()
# storageMode(XY) = "lockedEnvironment"
XY
|
cd8b85b6 |
}
|
e609f78b |
RGtoXY2 = function(RG, chipType, verbose=TRUE) {
|
56816837 |
chipList = c("human1mv1c", # 1M
|
e609f78b |
"human370v1c", # 370CNV
"human650v3a", # 650Y
"human610quadv1b", # 610 quad
"human660quadv1a", # 660 quad
"human370quadv3c", # 370CNV quad
"human550v3b", # 550K
"human1mduov3b", # 1M Duo
|
56816837 |
"humanomni1quadv1b", # Omni1 quad
"humanomniexpress12v1b") # Omni express 12
|
e609f78b |
if(missing(chipType)){
chipType = match.arg(annotation(RG), chipList)
} else chipType = match.arg(chipType, chipList)
|
56816837 |
|
e609f78b |
pkgname <- getCrlmmAnnotationName(chipType)
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
message(strwrap(msg))
stop("Package ", pkgname, " could not be found.")
rm(suggCall, msg)
}
if(verbose) message("Loading chip annotation information.")
loader("address.rda", .crlmmPkgEnv, pkgname)
# data(annotation, package=pkgname, envir=.crlmmPkgEnv)
aids <- getVarInEnv("addressA") # comes from AddressA_ID or Address column in manifest
bids <- getVarInEnv("addressB") # comes from AddressB_ID or Address2 column in manifest
ids <- names(aids)
snpbase <- getVarInEnv("base")
|
56816837 |
|
e609f78b |
nsnps = length(aids)
narrays = ncol(RG)
|
56816837 |
|
e609f78b |
# aidcol = match("AddressA_ID", colnames(annot))
# if(is.na(aidcol))
# aidcol = match("Address", colnames(annot))
# bidcol = match("AddressB_ID", colnames(annot))
|
56816837 |
# if(is.na(bidcol))
|
e609f78b |
# bidcol = match("Address2", colnames(annot))
# aids = annot[, aidcol]
# bids = annot[, bidcol]
# snpids = annot[,"Name"]
|
56816837 |
# snpbase = annot[,"SNP"]
infI = !is.na(bids) & bids!=0
|
e609f78b |
aord = match(aids, featureNames(RG)) # NAs are possible here
bord = match(bids, featureNames(RG)) # and here
# argrg = aids[rrgg]
# brgrg = bids[rrgg]
# fD <- new("AnnotatedDataFrame", data=data.frame(row.names=ids))##, varMetadata=data.frame(labelDescript
XY <- new("NChannelSet",
X=initializeBigMatrix(name="X", nr=nsnps, nc=narrays, vmode="integer"),
Y=initializeBigMatrix(name="Y", nr=nsnps, nc=narrays, vmode="integer"),
zero=initializeBigMatrix(name="zero", nr=nsnps, nc=narrays, vmode="integer"),
annotation=chipType, phenoData=RG@phenoData, # featureData=fD
protocolData=RG@protocolData, storage.mode="environment")
featureNames(XY) = ids # featureNames(RG)
gc()
|
a11efc9c |
# Need to initialize - matrices filled with NAs to begin with
XY@assayData$X[1:nsnps,] = 0
XY@assayData$Y[1:nsnps,] = 0
XY@assayData$zero[1:nsnps,] = 0
|
56816837 |
|
e609f78b |
# First sort out Infinium II SNPs, X -> R (allele A) and Y -> G (allele B) from the same probe
XY@assayData$X[!is.na(aord),] = exprs(channel(RG, "R"))[aord[!is.na(aord)],] # mostly red
XY@assayData$Y[!is.na(aord),] = exprs(channel(RG, "G"))[aord[!is.na(aord)],] # mostly green
XY@assayData$zero[!is.na(aord),] = exprs(channel(RG, "zero"))[aord[!is.na(aord)],] # mostly green
# XY@assayData$Xnb[!is.na(aord),] = exprs(channel(RG, "Rnb"))[aord[!is.na(aord)],]
# XY@assayData$Ynb[!is.na(aord),] = exprs(channel(RG, "Gnb"))[aord[!is.na(aord)],]
# XY@assayData$Xse[!is.na(aord),] = exprs(channel(RG, "Rse"))[aord[!is.na(aord)],]
# XY@assayData$Yse[!is.na(aord),] = exprs(channel(RG, "Gse"))[aord[!is.na(aord)],]
gc()
|
56816837 |
|
e609f78b |
## Warning - not 100% sure that the code below is correct - could be more complicated than this
|
56816837 |
|
e609f78b |
# Next Infinium I where X -> R from allele A probe and Y -> R from allele B probe
# infIRR = infI & (snpbase=="[A/T]" | snpbase=="[T/A]" | snpbase=="[a/t]" | snpbase=="[t/a]")
|
56816837 |
|
e609f78b |
# X[infIRR,] = exprs(channel(RG, "R"))[aord[infIRR],] # mostly red
# Y[infIRR,] = exprs(channel(RG, "R"))[bord[infIRR],] # mostly green
# Xnb[infIRR,] = exprs(channel(RG, "Rnb"))[aord[infIRR],]
# Ynb[infIRR,] = exprs(channel(RG, "Rnb"))[bord[infIRR],]
# Xse[infIRR,] = exprs(channel(RG, "Rse"))[aord[infIRR],]
# Yse[infIRR,] = exprs(channel(RG, "Rse"))[bord[infIRR],]
|
56816837 |
|
e609f78b |
# Finally Infinium I where X -> G from allele A probe and Y -> G from allele B probe
# infIGG = infI & (snpbase=="[C/G]" | snpbase=="[G/C]" | snpbase=="[g/c]" | snpbase=="[c/g]")
# X[infIGG,] = exprs(channel(RG, "G"))[aord[infIGG],] # mostly red
# Y[infIGG,] = exprs(channel(RG, "G"))[bord[infIGG],] # mostly green
# Xnb[infIGG,] = exprs(channel(RG, "Gnb"))[aord[infIGG],]
# Ynb[infIGG,] = exprs(channel(RG, "Gnb"))[bord[infIGG],]
# Xse[infIGG,] = exprs(channel(RG, "Gse"))[aord[infIGG],]
# Yse[infIGG,] = exprs(channel(RG, "Gse"))[bord[infIGG],]
|
56816837 |
|
e609f78b |
# For now zero out Infinium I probes
XY@assayData$X[infI,] = 0
XY@assayData$Y[infI,] = 0
|
56816837 |
XY@assayData$zero[infI,] = 0
|
e609f78b |
# XY@assayData$Xnb[infI,] = 0
# XY@assayData$Ynb[infI,] = 0
# XY@assayData$Xse[infI,] = 0
# XY@assayData$Yse[infI,] = 0
# XY@assayData$zero[XY@assayData$X==0 | XY@assayData$Y==0] = 1
gc()
# storageMode(XY) = "lockedEnvironment"
XY
}
|
cd8b85b6 |
stripNormalize = function(XY, useTarget=TRUE, verbose=TRUE) {
|
5fcfed7d |
pkgname <- getCrlmmAnnotationName(annotation(XY))
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
message(strwrap(msg))
stop("Package ", pkgname, " could not be found.")
rm(suggCall, msg)
}
if(verbose) message("Loading strip and reference normalization information.")
loader("preprocStuff.rda", .crlmmPkgEnv, pkgname)
# data(preprocStuff, package=pkgname, envir=.crlmmPkgEnv)
stripnum <- getVarInEnv("stripnum")
|
56816837 |
|
5fcfed7d |
if(useTarget)
targetdist = getVarInEnv("reference")
|
56816837 |
|
5fcfed7d |
# Xqws = Yqws = matrix(0, nrow(XY), ncol(XY))
# colnames(Xqws) = colnames(Yqws) = sampleNames(XY) #$X
# rownames(Xqws) = rownames(Yqws) = featureNames(XY)
if(verbose){
message("Quantile normalizing ", ncol(XY), " arrays by ", max(stripnum), " strips.")
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=max(stripnum), style=3)
|
56816837 |
}
|
5fcfed7d |
for(s in 1:max(stripnum)) {
if(verbose) {
if (getRversion() > '2.7.0') setTxtProgressBar(pb, s)
else cat(".")
}
sel = stripnum==s
subX = exprs(channel(XY, "X"))[sel,]
subY = exprs(channel(XY, "Y"))[sel,]
if(useTarget)
tmp = normalize.quantiles.use.target(as.matrix(cbind(subX, subY)), targetdist[[s]])
else
tmp = normalize.quantiles(as.matrix(cbind(subX, subY)))
|
a11efc9c |
XY@assayData$X[sel,] = matrix(as.integer(tmp[,1:(ncol(tmp)/2)]+16), nrow(tmp), ncol(tmp)/2)
XY@assayData$Y[sel,] = matrix(as.integer(tmp[,(ncol(tmp)/2+1):ncol(tmp)]+16), nrow(tmp), ncol(tmp)/2)
|
5fcfed7d |
# Xqws[sel,] = tmp[,1:(ncol(tmp)/2)]
# Yqws[sel,] = tmp[,(ncol(tmp)/2+1):ncol(tmp)]
rm(subX, subY, tmp, sel)
gc()
}
if(verbose)
cat("\n")
XY
# XYNorm = new("NChannelSet",
# X=Xqws+16,
# Y=Yqws+16,
# Xnb=exprs(channel(XY, "Xnb")),
# Ynb=exprs(channel(XY, "Ynb")),
# Xse=exprs(channel(XY, "Xse")),
# Yse=exprs(channel(XY, "Yse")),
# zero=exprs(channel(XY, "zero")),
# annotation=annotation(XY),
# phenoData=XY@phenoData)
|
cd8b85b6 |
}
|
5fcfed7d |
preprocessInfinium2 <- function(XY, mixtureSampleSize=10^5,
fitMixture=TRUE,
eps=0.1,
verbose=TRUE,
seed=1,
cdfName,
sns,
stripNorm=TRUE,
useTarget=TRUE,
save.it=FALSE,
snpFile,
cnFile) {
if(stripNorm)
XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
## MR: the code below is mostly straight from snprma.R
if (missing(sns)) sns <- sampleNames(XY) #$X
if(missing(cdfName))
cdfName <- annotation(XY)
## stuffDir <- changeToCrlmmAnnotationName(cdfName)
pkgname <- getCrlmmAnnotationName(cdfName)
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
message(strwrap(msg))
stop("Package ", pkgname, " could not be found.")
rm(suggCall, msg)
}
if(verbose) message("Loading snp annotation and mixture model parameters.")
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
# data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
autosomeIndex <- getVarInEnv("autosomeIndex")
# pnsa <- getVarInEnv("pnsa")
|
cd8b85b6 |
# pnsb <- getVarInEnv("pnsb")
# fid <- getVarInEnv("fid")
# reference <- getVarInEnv("reference")
# aIndex <- getVarInEnv("aIndex")
# bIndex <- getVarInEnv("bIndex")
|
5fcfed7d |
SMEDIAN <- getVarInEnv("SMEDIAN")
theKnots <- getVarInEnv("theKnots")
gns <- getVarInEnv("gns")
narrays = ncol(XY)
|
56816837 |
|
5fcfed7d |
if(save.it & !missing(cnFile)) {
|
d6564036 |
## separate out copy number probes
npIndex = getVarInEnv("npProbesFid")
nprobes = length(npIndex)
if(length(nprobes > 0)){
|
348dfe40 |
is.ff <- is(XY@assayData$X, "ff") | is(XY@assayData$X, "ffdf")
if(is.ff){
open(XY@assayData$X)
open(XY@assayData$Y)
open(XY@assayData$zero)
}
|
d6564036 |
A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
## new lines below - useful to keep track of zeroed out probes
zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
colnames(A) <- colnames(B) <- colnames(zero) <- sns
rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
t0 <- proc.time()
save(cnAB, file=cnFile)
t0 <- proc.time()-t0
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
rm(cnAB, B, zero)
}
|
5fcfed7d |
}
|
56816837 |
|
5fcfed7d |
# next process snp probes
snpIndex = getVarInEnv("snpProbesFid")
nprobes <- length(snpIndex)
|
56816837 |
|
5fcfed7d |
##We will read each cel file, summarize, and run EM one by one
##We will save parameters of EM to use later
mixtureParams <- matrix(0, 4, narrays)
SNR <- vector("numeric", narrays)
SKW <- vector("numeric", narrays)
## This is the sample for the fitting of splines
## BC: I like better the idea of the user passing the seed,
## because this might intefere with other analyses
## (like what happened to GCRMA)
set.seed(seed)
idx <- sort(sample(autosomeIndex, mixtureSampleSize))
|
a11efc9c |
idx2 <- sample(nprobes, 10^5)
|
56816837 |
|
5fcfed7d |
##S will hold (A+B)/2 and M will hold A-B
##NOTE: We actually dont need to save S. Only for pics etc...
##f is the correction. we save to avoid recomputing
A <- matrix(as.integer(exprs(channel(XY, "X"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
B <- matrix(as.integer(exprs(channel(XY, "Y"))[snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
# new lines below - useful to keep track of zeroed out SNPs
zero <- matrix(as.integer(exprs(channel(XY, "zero"))[snpIndex,]), nprobes, narrays)
|
56816837 |
|
5fcfed7d |
colnames(A) <- colnames(B) <- colnames(zero) <- sns
rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
|
56816837 |
|
5fcfed7d |
if(verbose){
message("Calibrating ", narrays, " arrays.")
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
}
|
a11efc9c |
|
5fcfed7d |
for(i in 1:narrays){
SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
if(fitMixture){
S <- (log2(A[idx, i])+log2(B[idx, i]))/2 - SMEDIAN
M <- log2(A[idx, i])-log2(B[idx, i])
##we need to test the choice of eps.. it is not the max diff between funcs
tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
mixtureParams[, i] <- tmp[["coef"]]
SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
}
if(verbose) {
if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
else cat(".")
}
}
if (verbose) {
if (getRversion() > '2.7.0') close(pb)
else cat("\n")
}
if (!fitMixture) SNR <- mixtureParams <- NA
## gns comes from preprocStuff.rda
res = list(A=A, B=B, zero=zero, sns=sns, gns=gns, SNR=SNR, SKW=SKW, mixtureParams=mixtureParams, cdfName=cdfName)
if(save.it & !missing(snpFile)) {
|
56816837 |
t0 <- proc.time()
save(res, file=snpFile)
|
5fcfed7d |
t0 <- proc.time()-t0
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
}
return(res)
|
cd8b85b6 |
}
|
e609f78b |
preprocessInfinium2v2 <- function(XY, mixtureSampleSize=10^5,
fitMixture=TRUE,
eps=0.1,
verbose=TRUE,
seed=1,
cdfName,
sns,
stripNorm=TRUE,
|
a11efc9c |
useTarget=TRUE, # ) { #,
save.it=FALSE,
snpFile,
cnFile) {
|
e609f78b |
if(stripNorm)
XY = stripNormalize(XY, useTarget=useTarget, verbose=verbose)
## MR: the code below is mostly straight from snprma.R
if (missing(sns)) sns <- sampleNames(XY) #$X
if(missing(cdfName))
cdfName <- annotation(XY)
## stuffDir <- changeToCrlmmAnnotationName(cdfName)
pkgname <- getCrlmmAnnotationName(cdfName)
if(!require(pkgname, character.only=TRUE, quietly=!verbose)){
suggCall <- paste("library(", pkgname, ", lib.loc='/Altern/Lib/Loc')", sep="")
msg <- paste("If", pkgname, "is installed on an alternative location, please load it manually by using", suggCall)
message(strwrap(msg))
stop("Package ", pkgname, " could not be found.")
rm(suggCall, msg)
}
if(verbose) message("Loading snp annotation and mixture model parameters.")
loader("genotypeStuff.rda", .crlmmPkgEnv, pkgname)
loader("mixtureStuff.rda", .crlmmPkgEnv, pkgname)
loader("snpProbesFid.rda", .crlmmPkgEnv, pkgname)
loader("npProbesFid.rda", .crlmmPkgEnv, pkgname)
# data(genotypeStuff, mixtureStuff, package=pkgname, envir=.crlmmPkgEnv)
autosomeIndex <- getVarInEnv("autosomeIndex")
# pnsa <- getVarInEnv("pnsa")
# pnsb <- getVarInEnv("pnsb")
# fid <- getVarInEnv("fid")
# reference <- getVarInEnv("reference")
# aIndex <- getVarInEnv("aIndex")
# bIndex <- getVarInEnv("bIndex")
SMEDIAN <- getVarInEnv("SMEDIAN")
theKnots <- getVarInEnv("theKnots")
|
a11efc9c |
# gns <- featureNames(XY) # getVarInEnv("gns") # needs to include np probes - gns is only for snps
|
e609f78b |
narrays = ncol(XY)
|
56816837 |
|
a11efc9c |
if(save.it & !missing(cnFile)) {
# separate out copy number probes
npIndex = getVarInEnv("npProbesFid")
nprobes = length(npIndex)
A <- matrix(as.integer(exprs(channel(XY, "X"))[npIndex,]), nprobes, narrays)
B <- matrix(as.integer(exprs(channel(XY, "Y"))[npIndex,]), nprobes, narrays)
|
e609f78b |
|
a11efc9c |
# new lines below - useful to keep track of zeroed out probes
|
56816837 |
zero <- matrix(as.integer(exprs(channel(XY, "zero"))[npIndex,]), nprobes, narrays)
|
e609f78b |
|
a11efc9c |
colnames(A) <- colnames(B) <- colnames(zero) <- sns
rownames(A) <- rownames(B) <- rownames(zero) <- names(npIndex)
|
56816837 |
|
a11efc9c |
cnAB = list(A=A, B=B, zero=zero, sns=sns, gns=names(npIndex), cdfName=cdfName)
|
56816837 |
t0 <- proc.time()
save(cnAB, file=cnFile)
|
a11efc9c |
t0 <- proc.time()-t0
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", cnFile, ".")
rm(cnAB, B, zero)
}
|
56816837 |
|
e609f78b |
# next process snp probes
snpIndex = getVarInEnv("snpProbesFid")
nprobes <- length(snpIndex)
|
56816837 |
|
e609f78b |
##We will read each cel file, summarize, and run EM one by one
##We will save parameters of EM to use later
|
a11efc9c |
mixtureParams <- initializeBigMatrix("crlmmMixt-", 4, narrays, "double")
SNR <- initializeBigVector("crlmmSNR-", narrays, "double")
|
56816837 |
SKW <- initializeBigVector("crlmmSKW-", narrays, "double")
|
a11efc9c |
# mixtureParams <- matrix(0, 4, narrays)
# SNR <- vector("numeric", narrays)
# SKW <- vector("numeric", narrays)
|
e609f78b |
## This is the sample for the fitting of splines
## BC: I like better the idea of the user passing the seed,
## because this might intefere with other analyses
## (like what happened to GCRMA)
set.seed(seed)
idx <- sort(sample(autosomeIndex, mixtureSampleSize))
|
a11efc9c |
idx2 <- sample(nprobes, 10^5)
|
56816837 |
|
e609f78b |
##S will hold (A+B)/2 and M will hold A-B
##NOTE: We actually dont need to save S. Only for pics etc...
##f is the correction. we save to avoid recomputing
|
a11efc9c |
# A <- exprs(channel(XY, "X"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsa), length(filenames))
# B <- exprs(channel(XY, "Y"))[,] # ), nrow(XY), narrays) # [snpIndex,]), nprobes, narrays) # matrix(as.integer(0), length(pnsb), length(filenames))
|
e609f78b |
# new lines below - useful to keep track of zeroed out SNPs
|
a11efc9c |
# zero <- exprs(channel(XY, "zero"))[,] # )) #[snpIndex,]), nprobes, narrays)
|
e609f78b |
# if(!is.matrix(A)) {
# A = A[,]
# B = B[,]
# zero = zero[,]
# }
|
a11efc9c |
# if(!is.integer(A)) {
# A = matrix(as.integer(A), nrow(A), ncol(A))
# B = matrix(as.integer(B), nrow(B), ncol(B))
|
56816837 |
# }
|
e609f78b |
# colnames(A) <- colnames(B) <- colnames(zero) <- sns
# rownames(A) <- rownames(B) <- rownames(zero) <- names(snpIndex) # gns # featureNames(XY)
|
a11efc9c |
A <- initializeBigMatrix("crlmmA-", nprobes, narrays, "integer")
B <- initializeBigMatrix("crlmmB-", nprobes, narrays, "integer")
zero <- initializeBigMatrix("crlmmZero-", nprobes, narrays, "integer")
|
56816837 |
|
e609f78b |
if(verbose){
message("Calibrating ", narrays, " arrays.")
if (getRversion() > '2.7.0') pb <- txtProgressBar(min=0, max=narrays, style=3)
}
for(i in 1:narrays){
|
a11efc9c |
A[,i] = exprs(channel(XY, "X"))[snpIndex,i]
B[,i] = exprs(channel(XY, "Y"))[snpIndex,i]
|
56816837 |
zero[,i] = exprs(channel(XY, "zero"))[snpIndex,i]
|
a11efc9c |
# SKW[i] = mean((A[snpIndex,i][idx2]-mean(A[snpIndex,i][idx2]))^3)/(sd(A[snpIndex,i][idx2])^3)
SKW[i] = mean((A[idx2,i]-mean(A[idx2,i]))^3)/(sd(A[idx2,i])^3)
|
e609f78b |
if(fitMixture){
|
a11efc9c |
S <- (log2(A[idx,i])+log2(B[idx,i]))/2 - SMEDIAN
M <- log2(A[idx,i])-log2(B[idx,i])
|
e609f78b |
##we need to test the choice of eps.. it is not the max diff between funcs
tmp <- fitAffySnpMixture56(S, M, theKnots, eps=eps)
mixtureParams[, i] <- tmp[["coef"]]
SNR[i] <- tmp[["medF1"]]^2/(tmp[["sigma1"]]^2+tmp[["sigma2"]]^2)
}
if(verbose) {
if (getRversion() > '2.7.0') setTxtProgressBar(pb, i)
else cat(".")
}
}
if (verbose) {
if (getRversion() > '2.7.0') close(pb)
else cat("\n")
}
if (!fitMixture) SNR <- mixtureParams <- NA
## gns comes from preprocStuff.rda
|
a11efc9c |
res = list(A=A, B=B,
zero=zero, sns=sns, gns=names(snpIndex), SNR=SNR, SKW=SKW,
mixtureParams=mixtureParams, cdfName=cdfName) # , snpIndex=snpIndex, npIndex=npIndex)
|
e609f78b |
|
a11efc9c |
if(save.it & !missing(snpFile)) {
|
56816837 |
t0 <- proc.time()
save(res, file=snpFile)
|
a11efc9c |
t0 <- proc.time()-t0
if(verbose) message("Used ", round(t0[3],1), " seconds to save ", snpFile, ".")
}
close(A)
close(B)
close(zero)
close(SKW)
close(mixtureParams)
close(SNR)
|
e609f78b |
return(res)
}
|
5fcfed7d |
crlmmIllumina <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
row.names=TRUE, col.names=TRUE,
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile,
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
cdfName, sns, recallMin=10, recallRegMin=1000,
returnParams=FALSE, badSNP=.7) {
if (save.it & (missing(snpFile) | missing(cnFile)))
stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
if (load.it & missing(snpFile))
stop("'snpFile' is missing and you chose to load.it")
if (!missing(snpFile))
if (load.it & !file.exists(snpFile)){
load.it <- FALSE
message("File ", snpFile, " does not exist.")
stop("Cannot load SNP data.")
}
if (!load.it){
if(!missing(RG)) {
if(missing(XY))
XY = RGtoXY(RG, chipType=cdfName)
else
stop("Both RG and XY specified - please use one or the other")
}
if (missing(sns)) sns <- sampleNames(XY) #$X
|
56816837 |
|
5fcfed7d |
res = preprocessInfinium2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget,
save.it=save.it, snpFile=snpFile, cnFile=cnFile)
}else{
if(verbose) message("Loading ", snpFile, ".")
obj <- load(snpFile)
if(verbose) message("Done.")
if(!any(obj == "res"))
stop("Object in ", snpFile, " seems to be invalid.")
}
|
a11efc9c |
if(row.names) row.names=res[["gns"]] else row.names=NULL
if(col.names) col.names=res[["sns"]] else col.names=NULL
|
5fcfed7d |
res2 <- crlmmGT(res[["A"]], res[["B"]], res[["SNR"]],
res[["mixtureParams"]], res[["cdfName"]],
gender=gender, row.names=row.names,
col.names=col.names, recallMin=recallMin,
recallRegMin=1000, SNRMin=SNRMin,
returnParams=returnParams, badSNP=badSNP,
verbose=verbose)
|
56816837 |
|
5fcfed7d |
res2[["SNR"]] <- res[["SNR"]]
res2[["SKW"]] <- res[["SKW"]]
rm(res)
gc()
return(list2SnpSet(res2, returnParams=returnParams)) # return(res2)
}
|
e609f78b |
crlmmIllumina2 <- function(RG, XY, stripNorm=TRUE, useTarget=TRUE,
row.names=TRUE, col.names=TRUE,
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
a11efc9c |
seed=1, save.it=FALSE, load.it=FALSE, snpFile, cnFile,
|
e609f78b |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
cdfName, sns, recallMin=10, recallRegMin=1000,
returnParams=FALSE, badSNP=.7) {
|
a11efc9c |
if (save.it & (missing(snpFile) | missing(cnFile)))
stop("'snpFile' and/or 'cnFile' is missing and you chose to save.it")
if (load.it & missing(snpFile))
stop("'snpFile' is missing and you chose to load.it")
if (!missing(snpFile))
if (load.it & !file.exists(snpFile)){
load.it <- FALSE
message("File ", snpFile, " does not exist.")
stop("Cannot load SNP data.")
}
if (!load.it){
|
e609f78b |
if(!missing(RG)) {
if(missing(XY))
XY = RGtoXY2(RG, chipType=cdfName)
else
stop("Both RG and XY specified - please use one or the other")
}
if (missing(sns)) sns <- sampleNames(XY) #$X
|
56816837 |
|
e609f78b |
res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget) #,
# save.it=save.it, snpFile=snpFile, cnFile=cnFile)
|
a11efc9c |
# fD = featureData(XY)
# phenD = XY@phenoData
# protD = XY@protocolData
# rm(XY)
# gc()
# if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
# callSet <- new("SnpSuperSet",
# alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
# alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
# call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
# callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
|
56816837 |
# annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
|
a11efc9c |
# sampleNames(callSet) <- sns
# featureNames(callSet) <- res[["gns"]]
# pData(callSet)$SKW <- rep(NA, length(sns))
# pData(callSet)$SNR <- rep(NA, length(sns))
# pData(callSet)$gender <- rep(NA, length(sns))
|
56816837 |
|
a11efc9c |
}else{
if(verbose) message("Loading ", snpFile, ".")
obj <- load(snpFile)
if(verbose) message("Done.")
if(!any(obj == "res"))
stop("Object in ", snpFile, " seems to be invalid.")
}
|
e609f78b |
|
a11efc9c |
# rm(phenD, protD , fD)
|
56816837 |
# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
|
a11efc9c |
# suppressWarnings(A(callSet) <- res[["A"]])
# suppressWarnings(B(callSet) <- res[["B"]])
# pData(callSet)$SKW <- res$SKW
# pData(callSet)$SNR <- res$SNR
# mixtureParams <- res$mixtureParams
# rm(res); gc()
if(row.names) row.names=res$gns else row.names=NULL
if(col.names) col.names=res$sns else col.names=NULL
|
56816837 |
|
a11efc9c |
res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]),
SNR=res[["SNR"]], # callSet$SNR, # [j],
mixtureParams=res[["mixtureParams"]], #,
cdfName=res[["cdfName"]], # annotation(callSet),
row.names=row.names, # featureNames(callSet)[snp.index],
col.names=col.names, # sampleNames(callSet), #[j],
|
e609f78b |
probs=probs,
DF=DF,
SNRMin=SNRMin,
recallMin=recallMin,
recallRegMin=recallRegMin,
gender=gender,
verbose=verbose,
returnParams=returnParams,
badSNP=badSNP)
|
56816837 |
# rm(res); gc()
|
e609f78b |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
# callSet$gender[j] <- tmp$gender
|
a11efc9c |
# suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
# suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
# callSet$gender <- tmp$gender
# rm(tmp); gc()
# return(callSet)
res2[["SNR"]] <- res[["SNR"]]
res2[["SKW"]] <- res[["SKW"]]
rm(res); gc()
return(list2SnpSet(res2, returnParams=returnParams))
|
e609f78b |
}
|
56816837 |
## MR: Below is a more memory efficient version of crlmmIllumina() which
## reads in the .idats and genotypes in the one function and removes objects
|
4e4601af |
## after they have been used
|
5fcfed7d |
crlmmIlluminaV2 = function(sampleSheet=NULL,
|
e609f78b |
arrayNames=NULL,
|
5fcfed7d |
ids=NULL,
path=".",
arrayInfoColNames=list(barcode="SentrixBarcode_A", position="SentrixPosition_A"),
highDensity=FALSE,
sep="_",
fileExt=list(green="Grn.idat", red="Red.idat"),
saveDate=FALSE,
|
e609f78b |
# save.rg=FALSE,
# rgFile,
|
5fcfed7d |
stripNorm=TRUE,
useTarget=TRUE,
|
56816837 |
row.names=TRUE,
|
5fcfed7d |
col.names=TRUE,
probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL,
|
a11efc9c |
seed=1, save.ab=FALSE, snpFile, cnFile,
|
e609f78b |
mixtureSampleSize=10^5, eps=0.1, verbose=TRUE,
cdfName, sns, recallMin=10, recallRegMin=1000,
returnParams=FALSE, badSNP=.7) {
if(missing(cdfName)) stop("must specify cdfName")
if(!isValidCdfName(cdfName)) stop("cdfName not valid. see validCdfNames")
# if(missing(sns)) sns <- basename(arrayNames)
|
56816837 |
|
e609f78b |
# if (save.rg & missing(rgFile))
# stop("'rgFile' is missing, and you chose save.rg")
|
a11efc9c |
if (save.ab & (missing(snpFile) | missing(cnFile)))
stop("'snpFile' or 'cnFile' is missing and you chose save.ab")
|
e609f78b |
# batches = NULL
# if(!is.null(arrayNames))
# batches <- rep(1, length(arrayNames)) # problem here if arrayNames not specified! # splitIndicesByLength(seq(along=arrayNames), ocSamples())
# if(!is.null(sampleSheet))
# batches <- rep(1, nrow(sampleSheet))
# if(is.null(batches))
# batches=1
# k <- 1
# for(j in batches){
# if(verbose) message("Batch ", k, " of ", length(batches))
# RG = readIdatFiles(sampleSheet=sampleSheet[j,], arrayNames=arrayNames[j],
# ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
# highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
RG = readIdatFiles2(sampleSheet=sampleSheet, arrayNames=arrayNames,
|
4e4601af |
ids=ids, path=path, arrayInfoColNames=arrayInfoColNames,
highDensity=highDensity, sep=sep, fileExt=fileExt, saveDate=saveDate)
|
e609f78b |
# if(save.rg)
# save(RG, file=rgFile)
|
4e4601af |
|
e609f78b |
XY = RGtoXY2(RG, chipType=cdfName)
|
a11efc9c |
rm(RG); gc()
|
e609f78b |
if (missing(sns)) { sns = sampleNames(XY) #subsns = sampleNames(XY)
|
a11efc9c |
} # else subsns = sns[j]
|
e609f78b |
res = preprocessInfinium2v2(XY, mixtureSampleSize=mixtureSampleSize, fitMixture=TRUE, verbose=verbose,
|
a11efc9c |
seed=seed, eps=eps, cdfName=cdfName, sns=sns, stripNorm=stripNorm, useTarget=useTarget, #) # sns=subsns
save.it=save.ab, snpFile=snpFile, cnFile=cnFile)
# fD = featureData(XY)
# phenD = XY@phenoData
# protD = XY@protocolData
rm(XY); gc()
|
e609f78b |
# if(k == 1){
|
a11efc9c |
# if(verbose) message("Initializing container for alleleA, alleleB, call, callProbability")
# callSet <- new("SnpSuperSet",
# alleleA=initializeBigMatrix(name="A", nr=nrow(res[[1]]), nc=length(sns)),
# alleleB=initializeBigMatrix(name="B", nr=nrow(res[[1]]), nc=length(sns)),
# call=initializeBigMatrix(name="call", nr=nrow(res[[1]]), nc=length(sns)),
# callProbability=initializeBigMatrix(name="callPr", nr=nrow(res[[1]]), nc=length(sns)),
# annotation=cdfName, protocolData=protD, phenoData=phenD, featureData=fD)
# sampleNames(callSet) <- sns
|
e609f78b |
# phenoData(callSet) <- getPhenoData(sampleSheet=sampleSheet,
# arrayNames=sns,
# arrayInfoColNames=arrayInfoColNames)
# pD <- data.frame(matrix(NA, length(sns), 1), row.names=sns)
# colnames(pD) <- "ScanDate"
# protocolData(callSet) <- pData(protD) # new("AnnotatedDataFrame", data=pD)
# pData(protocolData(callSet))[j, ] <- pData(protocolData)
|
a11efc9c |
# featureNames(callSet) <- res[["gns"]]
# pData(callSet)$SKW <- rep(NA, length(sns))
# pData(callSet)$SNR <- rep(NA, length(sns))
|
56816837 |
# pData(callSet)$gender <- rep(NA, length(sns))
|
e609f78b |
# }
# pData(callSet)[j,] <- phenD
# pData(protocolData(callSet))[j,] <- protD
# pData(callSet) <- phenD
# pData(protocolData(callSet)) <- protD
|
a11efc9c |
# rm(phenD, protD, fD)
|
56816837 |
|
e609f78b |
# if(k > 1 & nrow(res[[1]]) != nrow(callSet)){
##RS: I don't understand why the IDATS for the
##same platform potentially have different lengths
# res[["A"]] <- res[["A"]][res$gns %in% featureNames(callSet), ]
# res[["B"]] <- res[["B"]][res$gns %in% featureNames(callSet), ]
# }
|
56816837 |
# snp.index <- res$snpIndex #match(res$gns, featureNames(callSet))
|
e609f78b |
# suppressWarnings(A(callSet)[, j] <- res[["A"]])
# suppressWarnings(B(callSet)[, j] <- res[["B"]])
|
a11efc9c |
# suppressWarnings(A(callSet) <- res[["A"]])
# suppressWarnings(B(callSet) <- res[["B"]])
|
e609f78b |
# pData(callSet)$SKW[j] <- res$SKW
# pData(callSet)$SNR[j] <- res$SNR
|
a11efc9c |
# pData(callSet)$SKW <- res$SKW
# pData(callSet)$SNR <- res$SNR
# mixtureParams <- res$mixtureParams
# rm(res); gc()
if(row.names) row.names=res$gns else row.names=NULL
|
56816837 |
if(col.names) col.names=res$sns else col.names=NULL
|
a11efc9c |
res2 <- crlmmGT2(A=res[["A"]], #as.matrix(A(callSet)[snp.index,]), # j]),
B=res[["B"]], # as.matrix(B(callSet)[snp.index,]), # j]),
SNR=res[["SNR"]], # callSet$SNR, # [j],
mixtureParams=res[["mixtureParams"]], #,
cdfName=res[["cdfName"]], # annotation(callSet),
row.names=row.names, # featureNames(callSet)[snp.index],
col.names=col.names, # sampleNames(callSet), #[j],
|
e609f78b |
probs=probs,
DF=DF,
SNRMin=SNRMin,
recallMin=recallMin,
recallRegMin=recallRegMin,
gender=gender,
verbose=verbose,
returnParams=returnParams,
badSNP=badSNP)
|
56816837 |
# rm(res); gc()
|
a11efc9c |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
# callSet$gender[j] <- tmp$gender
# suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
# suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
# callSet$gender <- tmp$gender
# rm(tmp); gc()
# return(callSet)
res2[["SNR"]] <- res[["SNR"]]
res2[["SKW"]] <- res[["SKW"]]
rm(res); gc()
return(list2SnpSet(res2, returnParams=returnParams))
# tmp <- crlmmGT(A=as.matrix(A(callSet)[snp.index,]), # j]),
# B=as.matrix(B(callSet)[snp.index,]), # j]),
# SNR=callSet$SNR, # [j],
# mixtureParams=mixtureParams,
# cdfName=annotation(callSet),
# row.names=featureNames(callSet)[snp.index],
# col.names=sampleNames(callSet), #[j],
# probs=probs,
# DF=DF,
# SNRMin=SNRMin,
# recallMin=recallMin,
# recallRegMin=recallRegMin,
# gender=gender,
# verbose=verbose,
# returnParams=returnParams,
# badSNP=badSNP)
|
e609f78b |
# suppressWarnings(snpCall(callSet)[snp.index, j] <- tmp[["calls"]])
# suppressWarnings(snpCallProbability(callSet)[snp.index, j] <- tmp[["confs"]])
# callSet$gender[j] <- tmp$gender
|
a11efc9c |
# suppressWarnings(snpCall(callSet)[snp.index,] <- tmp[["calls"]])
# suppressWarnings(snpCallProbability(callSet)[snp.index,] <- tmp[["confs"]])
# callSet$gender <- tmp$gender
# rm(tmp); gc()
|
e609f78b |
# k <- k+1
# }
|
a11efc9c |
# return(callSet)
|
4e4601af |
}
|