R/RNASEQ_Analysis_solo_SVMerge_SE.r
29405c80
 #' Combining the RNAseq reads of family members in a 
 #' single file.
 #'
 #' @param RNASeqDir  character. Directory containing RNAseq reads.
 #' @param returnMethod  character. Method of returning Data.
 #' @param outpath  character. Contains file path if Method of return is chosen as 
 #' Text.
 #' @param outFileName  character. Output file name. 
 #' @return Text or Dataframe containing TPM read counts of genes in the family.
 #' @examples
 #' RNASeqDir = system.file("extdata", package="nanotatoR")
 #' returnMethod="dataFrame"
 #' datRNASeq <- RNAseqcombine_solo(RNASeqDir = RNASeqDir,
 #' returnMethod = returnMethod)
 #' @importFrom AnnotationDbi mapIds
 #' @importFrom stats na.omit 
 #' @import org.Hs.eg.db
 #' @export
 RNAseqcombine_solo<-function(RNASeqDir,returnMethod=c("Text","dataFrame"),
                         outpath="",outFileName=""){ 
     #library()
     #setwd(RNASeqDir)
     l <- list.files(path = RNASeqDir, 
        pattern="*.genes.results", full.names = TRUE)
     len<-length(l)
     #dat<-listDatasets(ensembl)
     #g1<-grep("sscrofa",listDatasets(ensembl)$dataset)
     'grch37 = useMart(="ENSEMBL_MART_ENSEMBL", host="www.ensembl.org", 
                     dataset="hsapiens_gene_ensembl")'
     gen<-c(); 
     ##Need to make this function dynamic
     dat<-data.frame(matrix(ncol=len,nrow=nrow(r<-read.table(l[1],sep="\t",header=TRUE))))
     cnam<-c()
     for (ii in 1:length(l)){
         r<-read.table(l[ii],sep="\t",header=TRUE)
         gen<-c(gen,as.character(r$gene_id))
         dat[,ii]<-as.numeric(r$TPM)
         str<-strsplit(l[ii],split=".genes.results")
         #print(str[[1]][1])
         cnam<-c(cnam,str[[1]][1])
     }
     #datf<-data.frame(dat)
     gen<-unique(as.character(gen))
     st<-strsplit(gen,split="[.]")
     genes<-c()
     for(k in 1:length(gen)){
         genes<-c(genes,as.character(st[[k]][1]))
     }
     data1 <- data.frame(dat[,1])
     names(data1) <- cnam
     genesym <- c()
     ensemblid <- c()
     'gene1 = getBM(attributes = c("external_gene_name", "ensembl_gene_id"), 
                 filters = "ensembl_gene_id", values = genes, mart = grch37)'
     gn1 <- mapIds(org.Hs.eg.db, genes, "SYMBOL", "ENSEMBL")
     'rn <- row.names(data.frame(gn1))
     rn1 <- row.names(data.frame(gn2))'
     gene1<-data.frame(
         ensembl_gene_id = as.character(names((gn1))),
         external_gene_name = as.character(data.frame(gn1)[,1])
         )
     genesym<-as.character(gene1$external_gene_name)
     ensemblid<-as.character(gene1$ensembl_gene_id)
     gene3<-c()
     ens<-c()
     ensemblid<-paste("^",ensemblid,"$",sep="")
     for(kk in 1:length(genes)){
         pag<-paste("^",genes[kk],"$",sep="")
         val<-grep(pag,ensemblid,fixed=TRUE)
         if(length(val)>0){
             gene3<-c(gene3,as.character(unique(genesym[val])))
             ens<-c(ens,as.character(genes[kk]))
         }
         else{
             gene3<-c(gene3,as.character("-"))
             ens<-c(ens,as.character(genes[kk]))
         }
     }
     RNASeqDat<-data.frame(GeneName = as.character(gene3),
             GeneID = as.character(ens),data1)
     if(returnMethod=="Text"){
         fname = file.path(outFileName,".csv",sep = "")
         write.csv(RNASeqDat,file.path(outpath,fname), row.names = FALSE )
     }
     else if (returnMethod=="dataFrame"){
         return (RNASeqDat)
     }
     else{
         stop("Invalid ReturnMethod")
     }
 }
 #' Annotating the Overlapping genes with RNAseq expression
 #'
 #' @param gnsOverlap  character. Vector containing overlapping genes.
 #' @param SVID  character. SV Index ID.
 #' @param RNASeqData  dataFrame. RNAseq data with gene names. 
 #' @param pattern_Proband  character. Pattern for proband. 
 #' @return Dataframe containing TPM read counts of overlapping genes.
 #' @examples
 #' RNASeqDir = system.file("extdata", package="nanotatoR")
 #' returnMethod="dataFrame"
 #' datRNASeq <- RNAseqcombine_solo(RNASeqDir = RNASeqDir,
 #' returnMethod = returnMethod)
 #' gnsOverlap <- c("AGL")
 #' SVID = 397
 #' datgnovrlap <- OverlapRNAseq_solo(gnsOverlap = gnsOverlap, 
 #' SVID = SVID, RNASeqData = datRNASeq,
 #' pattern_Proband = "*_P_*")
 #' @importFrom stats na.omit  
 #' @export
 OverlapRNAseq_solo<-function(gnsOverlap, SVID, RNASeqData,
                 pattern_Proband = NA){
   
   ###Finding the column names
    #print(gnsOverlap);print(length(gnsOverlap))
   'if(is.na(pattern_Father)==FALSE){
     fatherInd<-grep(pattern_Father,names(RNASeqData))
     } else{fatherInd<- NA}
     if(is.na(pattern_Mother)==FALSE){
     motherInd<-grep(pattern_Mother,names(RNASeqData))
     } else{motherInd<- NA}'
     if(is.na(pattern_Proband)==FALSE){
         probandInd <- grep(pattern_Proband,names(RNASeqData))
     } else{probandInd<- NA}
     'if(is.na(pattern_Sibling)==FALSE){
       siblingInd<-grep(pattern_Sibling,names(RNASeqData))
     } 
     else{
       siblingInd<-NA
     }'
     sv<-c()
     gene<-c()
     gnsname<-as.character(RNASeqData$GeneName)
     pasgnsname<-pasgnovlap<-paste("^",gnsname,"$",sep="")
     'overlap_ensemblgenes = select(EnsDb.Hsapiens.v79, gnsOverlap, 
                 c("GENEID","GENENAME"), "SYMBOL")
     gnsOverlapID<-as.character(overlap_ensemblgenes$GENEID)'
     #print(paste("gnsOverlap:",gnsOverlap))
     #print(paste("overlap_ensemblgenes:",overlap_ensemblgenes))
     #print(paste("gnsOverlapID:",gnsOverlapID))
     #genes<-as.character(overlap_ensemblgenes$SYMBOL)
     ###Extracting Reads
     ###
     ###Genes Names Extraction
     #print(paste("fatherInd :",fatherInd))
     #print(paste("motherInd :",motherInd))
     #print(paste("probandInd :",probandInd))
     #print(paste("siblingInd :",siblingInd))
     gnsOverlapID <- as.character(gnsOverlap)
     #print(gnsOverlapID)
     if(length(gnsOverlapID)>1){
     
         datGeneInfoTemp<-data.frame()
         fatherReads<-c()
         motherReads<-c()
         probandReads<-c()
         siblingReads<-c()
         for (ki in 1:length(gnsOverlapID)){
             pasgnovlap <- paste("^", gnsOverlapID[ki],"$", sep = "")
             #print(ki)
             gg<-grep(pasgnovlap, pasgnsname, fixed = TRUE)
             dat_temp<-RNASeqData[gg, ]
         
             if(nrow(dat_temp)>1){
                 #print("FALSE")
                 #print(ki)
                 dat_temp_1 <- mean(dat_temp[,probandInd])
                 #dat_temp_1<-data.frame(dat_temp_1)
                 dat_temp1 <- dat_temp[1,]
                 dat_temp1[,probandInd] <- dat_temp_1
                 #print(dim(dat_temp1))
             
                 if(is.na(probandInd[1])==FALSE){
                     if(length(probandInd)>1){
                         for(j in probandInd){
                             probandcount<-c(probandcount,dat_temp1[,j])
                         }
                         probandReads<-c(
                             probandReads,paste(probandcount,collapse = ":"))
                     }
                     else if(length(probandInd)==1){
                         probandReads<-c(probandReads,dat_temp1[,probandInd])
                     }
                     else{
                         probandReads<-c(probandReads,0)
                     }
                 }
                 else{
                     probandReads<-c(probandReads,"-")
                 }
             }
             else if (nrow(dat_temp)==1) {
                 #print("TRUE")
                 #print(dim(dat_temp1))
                 probandcount<-c()
                 if(is.na(probandInd [1])==FALSE){ 
                     if(length(probandInd)>1){
                         for(j in probandInd){
                             probandcount<-c(probandcount,dat_temp[,j])
                         }
                         probandReads<-c(
                         probandReads, paste(probandcount,collapse = ":"))
                     }
                     else if(length(probandInd)==1){
                         probandReads<-c(
                             probandReads, mean(dat_temp[,probandInd]))
                     }
                     else{
                         probandReads<-c(probandReads,0)
                     }
                 }
                 else{
                     probandReads<-c(probandReads, "-")
                 }
             }
             else{
                 probandReads<-c(probandReads,"-")
             }   
             gene<-c(gene,as.character(gnsOverlapID[ki]))
         }
         if(is.na(probandInd[1])==FALSE){
             ProbandGenes<-c()
             for(ii in 1:length(gene)){
                 pasgene<-paste(gene[ii],"(", probandReads[ii],")", sep = "")
                 ProbandGenes<-c(ProbandGenes,pasgene)
             }
             ProbandTPM<-paste(ProbandGenes,collapse=";")
             } else{
                 ProbandTPM <- "-"
             }
         datGeneInfo<-data.frame(SVID=SVID, ProbandTPM=ProbandTPM)     
                 
     }
     else if(length(gnsOverlapID)==1){
         pasgnovlap<-paste("^",as.character(gnsOverlapID),"$",sep="")
         #print(ki)
         gg<-grep(pasgnovlap,pasgnsname,fixed=TRUE)
       
         dat_temp<-RNASeqData[gg,]
         if(nrow(dat_temp)>1){
             dat_temp_1 <- mean(dat_temp[,probandInd])
             #dat_temp_1<-data.frame(dat_temp_1)
             dat_temp1<-dat_temp[1,]
             dat_temp1[,probandInd]<-dat_temp_1
             #dat_temp1<-cbind(dat_temp[1,1:3],dat_temp1)
             #print(dim(dat_temp1))
             probandcount<-c()
             if(is.na(probandInd[1])==FALSE){ 
                 if(length(probandInd)>1){
                     for(j in probandInd){
                         probandcount <- c(probandcount,mean(dat_temp1[,j]))
                     }
                 probandReads <- paste(probandcount,collapse = ":")
                 }
                 else if(length(probandInd)==1){
                     probandReads<-mean(dat_temp1[,probandInd])
                 }
                 else{
                     probandReads <- 0
                 }
             } 
             else{
                 probandReads <- "-"
             }
           
           
         #motherReads<-dat_temp1[,motherInd]
         #probandReads<-dat_temp1[,probandInd]
         #if(is.na(siblingInd[1])==TRUE){
         #siblingReads<-"-"
         #}
         #else{
         #siblingReads<-dat_temp1[,siblingInd]
         #}
         }
         else if (nrow(dat_temp)==1){
        
             #print(dim(dat_temp1))
             if(is.na(probandInd[1])==FALSE){ 
                 if(length(probandInd)>1){
                     for(j in probandInd){
                         probandcount <- c(probandcount,mean(dat_temp[,j]))
                     }
                     probandReads<-paste(probandcount,collapse=":")
                 }
                 else if(length(probandInd)==1){
                     probandReads<-mean(dat_temp[,probandInd])
                 }
                 else{
                     probandReads<-0
                 }
             } else{
                 probandReads<-"-"
             }
         }
         else{
             probandReads<-"-"
         }
         #gene<-overlap_ensemblgenes$SYMBOL
         if(is.na(probandInd[1])==FALSE){
             gene<- as.character(gnsOverlapID)
         
             ProbandGenes<-paste(gene,"(",probandReads,")",sep="")
             #ProbandGenes<-c(ProbandGenes,pasgene)
             ProbandTPM<-as.character(ProbandGenes)
         }else{
             ProbandTPM <- "-"
         }
         datGeneInfo<-data.frame(SVID = SVID,ProbandTPM = ProbandTPM)
             
     }
     else{
         datGeneInfo <- data.frame(SVID = SVID,ProbandTPM = "-")
     }
   
   #print(warnings())
   
     return(datGeneInfo)
 }
 #' Annotating the Non-Overlapping genes with RNAseq expression
 #'
 #' @param gnsNonOverlap  character. Vector containing non-overlapping genes.
 #' @param SVID  character. SV Index ID.
 #' @param RNASeqData  dataFrame. RNAseq data with gene names. 
 #' @param pattern_Proband  character. Pattern for proband. 
 #' @return Dataframe containing TPM read counts of overlapping genes.
 #' @examples
 #' RNASeqDir = system.file("extdata", package="nanotatoR")
 #' returnMethod="dataFrame"
 #' datRNASeq <- RNAseqcombine_solo(RNASeqDir = RNASeqDir,
 #' returnMethod = returnMethod)
 #' gnsNonOverlap <- c("DDX11L1", "MIR1302-2HG", "OR4G4P")
 #' SVID = 397
 #' datgnnonovrlap <- nonOverlapRNAseq_solo(gnsNonOverlap = gnsNonOverlap, 
 #' SVID = SVID, RNASeqData = datRNASeq,
 #' pattern_Proband = "*_P_*")
 #' @importFrom stats na.omit  
 #' @export
 
 nonOverlapRNAseq_solo<-function(gnsNonOverlap,SVID,RNASeqData,
                 pattern_Proband = NA){
   ## annotation
   ###Checking if the input is empty; else if not empty add 
   ###expression values for each genes
     datGeneInfo<-data.frame()
     SVID=SVID
     ###Extracting the index for the the parents
     'if(is.na(pattern_Father)==FALSE){
     fatherInd<-grep(pattern_Father,names(RNASeqData))
     } else{fatherInd <- NA}
     if(is.na(pattern_Mother)==FALSE){
     motherInd<-grep(pattern_Mother,names(RNASeqData))
     } else{motherInd <- NA}'
     if(is.na(pattern_Proband)==FALSE){
          probandInd<-grep(pattern_Proband,names(RNASeqData))
     } else{probandInd <- NA}
     ##Checking for sibling
     'if(is.na(pattern_Sibling)==FALSE){
       siblingInd<-grep(pattern_Sibling,names(RNASeqData))
     } 
     else{
       siblingInd<-NA
     }'
     
     gene<-c()
     gnsname<-as.character(RNASeqData$GeneName)
     pasgnsname<-pasgnovlap<-paste("^",as.character(gnsname),"$",sep="")
     'nonoverlap_ensemblgenes = select(EnsDb.Hsapiens.v79, gnsNonOverlap, 
                 c("GENEID","GENENAME"), "SYMBOL")
     gnsnonOverlapID<-as.character(nonoverlap_ensemblgenes$GENEID)'    
     ###Extracting Reads
     ###
     ###Genes Names Extraction
     gnsnonOverlapID<- as.character(gnsNonOverlap)
     if(length(gnsnonOverlapID)>1){
         #datGeneInfoTemp<-data.frame()
         probandReads<-c()
       
       
         for (ki in 1:length(gnsnonOverlapID)){
             
             pasgnnonovlap <- paste("^",as.character(
                 gnsnonOverlapID[ki]),"$",sep = "")
             gg<-grep(pasgnnonovlap,pasgnsname,fixed=TRUE)
             dat_temp<-RNASeqData[gg,]
             if(nrow(dat_temp)>1){
                 dat_temp_1 <- mean(dat_temp[,probandInd])
                 #dat_temp_1<-data.frame(dat_temp_1)
                 dat_temp1<-dat_temp[1,]
                 dat_temp1[,probandInd]<-dat_temp_1
                 #print(dim(dat_temp1))
                 probandcount<-c()
                 if(is.na(probandInd[1]) == FALSE){
                     if(length(probandInd)>1){
                         for(j in probandInd){
                             probandcount <- c(probandcount,dat_temp1[,j])
                         }
                     probandReads<-c(
                         probandReads,paste(probandcount,collapse=":")
                         )
                     }
                     else if(length(probandInd)==1){
                         probandReads<-c(probandReads,dat_temp1[,probandInd])
                     }
                     else{
                         probandReads<-c(probandReads,0)
                     }
                 } else{
                     probandReads<-c(probandReads, "-")
                 }
             }
             else if (nrow(dat_temp)==1) {
                 #print(dim(dat_temp1))
                 probandcount<-c()
                 if(is.na(probandInd[1]) == FALSE){ 
                     if(length(probandInd)>1){
                         for(j in probandInd){
                             probandcount <- c(probandcount, dat_temp[,j])
                         }
                     probandReads<-c(probandReads,paste(probandcount,collapse=":"))
                 }
                 else if(length(probandInd)==1){
                     probandReads<-c(probandReads,dat_temp[,probandInd])
                 }
                 else{
                     probandReads<-c(probandReads,0)
                 }
             }
             else{
                 probandReads <- c(probandReads,"-")
             }
         }
         else{
             probandReads<-c(probandReads,"-")
         }
         gene<-c(gene,as.character(gnsnonOverlapID[ki]))
     }
         if(is.na(probandInd[1])==FALSE){
             ProbandGenes<-c()
         
             for(ii in 1:length(gene)){
                 pasgene<-paste(gene[ii],"(",probandReads[ii],")",sep="")
                 ProbandGenes<-c(ProbandGenes,pasgene)
             }
             ProbandTPM<-paste(ProbandGenes,collapse=";")
             } else{
                 ProbandTPM<-"-"
             }
         datGeneInfo<-data.frame(SVID=SVID,ProbandTPM=ProbandTPM)
     } 
     else if(length(gnsnonOverlapID)==1){
         pasgnnonovlap<-paste("^",as.character(gnsnonOverlapID),"$",sep="")
         gg<-grep(pasgnnonovlap,pasgnsname,fixed=TRUE)
         #gg<-grep(pasgnnonovlap,gnsname,fixed=TRUE)
         dat_temp<-RNASeqData[gg,]
       
         if(nrow(dat_temp)>1){
             dat_temp_1 <- mean(dat_temp[,probandInd])
             #dat_temp_1<-data.frame(dat_temp_1)
             dat_temp1<-dat_temp[1,]
             dat_temp1[, probandInd]<-dat_temp_1
            #print(dim(dat_temp1))
             probandcount<-c()
             if(is.na(probandInd[1])==FALSE){
                 if(length(probandInd)>1){
                     for(j in probandInd){
                         probandcount<-c(probandcount,dat_temp1[,j])
                     }
                 probandReads<-paste(probandcount,collapse=":")
             }
             else if(length(probandInd)==1){
                 probandReads<-dat_temp1[,probandInd]
             }
             else{
                 probandReads<-0
             }
         }
         else{
             probandReads<- "-"
         }
           
           
         }
         else if (nrow(dat_temp)==1){
        
          #print(dim(dat_temp1))
             probandcount<-c()
             if(is.na(probandInd)==FALSE){
                 if(length(probandInd)>1){
                     for(j in probandInd){
                         probandcount<-c(probandcount,dat_temp[,j])
                     }
                 probandReads <- paste(probandcount,collapse=":")
             }
             else if(length(probandInd)==1){
                 probandReads <- dat_temp[,probandInd]
             }
             else{
                 probandReads<-0
             }
         }
         else{
             probandReads<- "-"
         }
         }
         else{
             probandReads<-"-"
         }
         gene<-c(gene,as.character(gnsnonOverlapID))
         if(is.na(probandInd[1])==FALSE){
             ProbandGenes <- c()
             ProbandGenes <- paste(gene,"(",probandReads,")",sep="")
             #ProbandGenes<-c(ProbandGenes,pasgene)
         #}
             ProbandTPM<-as.character(ProbandGenes)
         }else{
             ProbandTPM<-"-"
         }
         datGeneInfo <- data.frame(SVID = SVID,ProbandTPM = ProbandTPM)
     }
     else{
         datGeneInfo < -data.frame(SVID = SVID,ProbandTPM = "-")
     
     }
     return(datGeneInfo)
 }
 #' Annotating the Overlapping and Non-Overlapping genes with RNAseq expression
 #'
 #' @param input_fmt_SV  character. Input format of the SV data.Options 
 #' "Text" or "DataFrame".
 #' @param smapdata  dataframe. SV data dataframe.
 #' @param smappath  character. smap path.
 #' @param input_fmt_RNASeq  character. Input format of  
 #' the RNASeq data. Options "Text" or "DataFrame"..
 #' @param RNASeqData  dataFrame. RNAseq data with gene names.
 #' @param RNASeqPATH  character. RNAseq dataset path . 
 #' @param outputfmt  character. Output format of  
 #' the result. Options "Text" or "DataFrame"..
 #' @param pattern_Proband  character. Pattern for proband. 
 #' @param EnzymeType  character. Enzyme used. option "SVMerge" or "SE".
 #' @return Dataframe Annotated datafreme with RNASeq data.
 #' @examples
 #' RNASeqDir = system.file("extdata", package="nanotatoR")
 #' returnMethod="dataFrame"
 #' datRNASeq <- RNAseqcombine_solo(RNASeqDir = RNASeqDir,
 #' returnMethod = returnMethod)
 #' smapName="NA12878_DLE1_VAP_solo5.smap"
 #' smap = system.file("extdata", smapName, package="nanotatoR")
 #' smap = system.file("extdata", smapName, package="nanotatoR")
 #' bedFile <- system.file("extdata", "HomoSapienGRCH19_lift37.bed", package="nanotatoR")
 #' outpath <- system.file("extdata", package="nanotatoR")
 #' datcomp<-overlapnearestgeneSearch(smap = smap, 
 #'     bed=bedFile, inputfmtBed = "bed", outpath, 
 #'     n = 3, returnMethod_bedcomp = c("dataFrame"), 
 #'     input_fmt_SV = "Text",
 #'     EnzymeType = "SE", 
 #'     bperrorindel = 3000, 
 #'     bperrorinvtrans = 50000)
 #' datRNASeq1 <- SVexpression_solo (input_fmt_SV=c("dataFrame"),
 #'     smapdata = datcomp,
 #'     input_fmt_RNASeq=c("dataFrame"),
 #'     RNASeqData = datRNASeq,
 #'     outputfmt=c("datFrame"),
 #'     pattern_Proband = "*_P_*", EnzymeType = c("SE"))
 #' datRNASeq1[1,]
 #' @importFrom stats na.omit  
 #' @export
 SVexpression_solo <- function(input_fmt_SV=c("Text","dataFrame"),
         smapdata,smappath, input_fmt_RNASeq=c("Text","dataFrame"),
         RNASeqData,RNASeqPATH,outputfmt=c("Text","datFrame"),
         pattern_Proband = NA, EnzymeType = c("SVMerge", "SE")){
   ###RNASEQ Analysis data
     if(input_fmt_RNASeq=="dataFrame"){
         RNASeqData = RNASeqData
     }
     else if(input_fmt_RNASeq=="Text"){
         RNASeqData=read.csv(RNASeqPATH)
     }
     else{
         stop("Input format for RNASeq Data Incorrect")
     }
   
     if(input_fmt_SV=="dataFrame"){
         smapdata = smapdata
         if(EnzymeType == "SVMerge"){
             #smapdata <- readSMap(smap, input_fmt_smap = "Text")
             SVID<-smapdata$SVIndex
         }
         else{
             #smapdata <- readSMap_DLE(smap, input_fmt_smap)
             SVID<-smapdata$SmapEntryID
         }
     }
     else if(input_fmt_SV=="Text"){
         if(EnzymeType == "SVMerge"){
             smapdata <- readSMap(smappath, input_fmt_smap = "Text")
             SVID<-smapdata$SVIndex
         }
         else{
             smapdata <- readSMap_DLE(smappath, input_fmt_smap = "Text")
             SVID<-smapdata$SmapEntryID
         }
     }
     else{
         stop("Input format for SMAP Incorrect")
     }
     ##Extracting Data
     overlapgenes <- stringr::str_trim(smapdata$OverlapGenes_strand_perc)
   
     
     dataOverLap <- data.frame(matrix(nrow = nrow(smapdata), ncol = 2))
     ##Extracting Overlapped Genes
     #dataOverLap<-data.frame(matrix(nrow=10,ncol=5))
     names(dataOverLap)<-c("SVID", "OverlapProbandTPM")
     print("###OverlapGenes###")
     for(kk in 1:length(overlapgenes)){
         #print(paste("kk:",kk))
         #for(kk in 1:10){
         #print(kk)
         datOverLap<-data.frame()
         #print(paste("kk:",kk,sep=""))
         svID<-as.character(SVID[kk])
         if(length(grep(";",overlapgenes[kk]))>=1){
             st1<-strsplit(as.character(overlapgenes[kk]), split = ";")
             sttemp<-as.character(st1[[1]])
             #print("1")
             gns_overlap<-c()
             for (tt in 1:length(sttemp)){
                 gn_temp<-strsplit(sttemp[tt], split = "\\(")
                 gns_overlap<-c(gns_overlap,as.character(gn_temp[[1]][1]))
             }
             
                 datOverLap<-OverlapRNAseq_solo(
                         gnsOverlap = as.character(gns_overlap),
                         SVID = svID, 
                         RNASeqData = RNASeqData,
                         pattern_Proband = pattern_Proband)
                 
         }
         else if (length(grep("\\(", as.character(overlapgenes[kk]))) >= 1){
             #print("2")
             gnsOverlap<-strsplit(as.character(overlapgenes[kk]),split="\\(")[[1]][1]
                 datOverLap<-OverlapRNAseq_solo(gnsOverlap = as.character(gnsOverlap),
                     SVID = svID,
                     RNASeqData = RNASeqData,
                     pattern_Proband = pattern_Proband)
             }
             else{
                 #print(paste("OverLapDNSVID:",svID))
                 datOverLap<-data.frame(
                     SVID = svID, ProbandTPM = "-")
                 }
                 dataOverLap[kk,]<-c(
                     as.character(datOverLap$SVID),
                     Proband_OverlapGeneExpression_TPM = as.character(datOverLap$ProbandTPM))
         }
     
             ##Extracting NonOverlapped Genes
             nearestUPGenes<-smapdata$Upstream_nonOverlapGenes_dist_kb
             #datanonOverLapUP<-data.frame(matrix(nrow=nrow(smapdata),ncol=5))
             datanonOverLapUP<-data.frame(matrix(nrow = nrow(smapdata),ncol = 2))
             names(datanonOverLapUP) <- c("SVID", "NonOverlapUPProbandTPM")
             print("###NonOverlapUPStreamGenes###") 
             for(ll in 1:length(nearestUPGenes))
             {
                 #for(ll in 1:10){
                 #print(ll)
                 datNonOverLapUP <- data.frame()
                 #print(paste("llUP:",ll,sep=""))
                 svID<-as.character(SVID[ll])
                 if(length(grep(";",nearestUPGenes[ll]))>=1){
                     st1<-strsplit(
                         as.character(nearestUPGenes[ll]),
                         split = ";")
                         sttemp<-as.character(st1[[1]])
                         #print("1")
                         gns_nonoverlap_up<-c()
                     for (mm in 1:length(sttemp)){
                         gn_temp<-strsplit(sttemp[mm],split="\\(")
                         gns_nonoverlap_up<-c(gns_nonoverlap_up, 
                             as.character(gn_temp[[1]][1]))
                     }
                 
                     datNonOverLapUP<-nonOverlapRNAseq_solo(
                         gnsNonOverlap = as.character(gns_nonoverlap_up),
                         SVID = svID,
                         RNASeqData = RNASeqData,
                         pattern_Proband = pattern_Proband)
                     
                 }
                 else if (length(grep(
                 "\\(",as.character(nearestUPGenes[ll]))) >=1
                 ){
             #print("2")
                         gnsNonOverlapUP<-strsplit(as.character(nearestUPGenes[ll]),split="\\(")[[1]][1]
                         
                         datNonOverLapUP<-nonOverlapRNAseq_solo(
                             gnsNonOverlap = as.character(gnsNonOverlapUP),
                             SVID = svID,RNASeqData = RNASeqData,
                             pattern_Proband=pattern_Proband)
                         
                     }
                 else{
                     #print(paste("NonOverLapUPSVID:",svID))
                         datNonOverLapUP<-data.frame(
                             SVID = svID, ProbandTPM="-"
                         )
         }
         datanonOverLapUP[ll,]<-c(
             as.character(datNonOverLapUP$SVID), 
             Proband_Upstream_nonOverlapGeneExpression_TPM = as.character(datNonOverLapUP$ProbandTPM))
                 
     }
     
   ##Extracting NonOverlapped Down Stream Genes
     nearestDNGenes<-smapdata$Downstream_nonOverlapGenes_dist_kb
     datanonOverLapDN<-data.frame(matrix(nrow = nrow(smapdata), ncol = 2))
     names(datanonOverLapDN)<-c("SVID","NonOverlapDNProbandTPM")
     print("###NonOverlapDNStreamGenes###") 
     for(nn in 1:length(nearestDNGenes))
     {
         #for(nn in 1:10){
         datNonOverLapDN<-data.frame()
         # print(paste("llDN:",ll,sep=""))
         svID<-as.character(SVID[nn])
         if(length(grep(";",nearestDNGenes[nn]))>=1){
             st1 <- strsplit(as.character(nearestDNGenes[nn]), split = ";")
             sttemp<-as.character(st1[[1]])
             #print("1")
             gns_nonoverlap_dn<-c()
             for (mm in 1:length(sttemp)){
                 gn_temp <- strsplit(sttemp[mm],split="\\(")
                 gns_nonoverlap_dn <- c(
                         gns_nonoverlap_dn,as.character(gn_temp[[1]][1])
                         )
             }
             datNonOverLapDN<-nonOverlapRNAseq_solo(
                     gnsNonOverlap = as.character(gns_nonoverlap_dn),
                     SVID = svID,RNASeqData = RNASeqData,
                     pattern_Proband = pattern_Proband)
             
         }
         else if (length(grep("\\(",as.character(nearestDNGenes[nn]))) >= 1){
             # print("2")
             gnsNonOverlapDN<-strsplit(as.character(
                 nearestDNGenes[nn]),split="\\(")[[1]][1]
                 datNonOverLapDN<-nonOverlapRNAseq_solo(
                         gnsNonOverlap = as.character(gnsNonOverlapDN),
                         SVID = svID,
                         RNASeqData = RNASeqData,
                         pattern_Proband = pattern_Proband)
                 
         }
         else{
             #print(paste("NonOverLapDNSVID:",svID))
             #print ("SVID")
             datNonOverLapDN<-data.frame(
             SVID = svID,ProbandTPM="-")
         }
             datanonOverLapDN[nn,]<-c(as.character(datNonOverLapDN$SVID),
                 Proband_Downstream_nonOverlapGeneExpression_TPM = as.character(datNonOverLapDN$ProbandTPM))
             }
   
             dataFinal<-data.frame(smapdata, 
                 OverlapProbandEXP = dataOverLap[,2],
                 NonOverlapUPprobandEXP = datanonOverLapUP[,2],
                 NonOverlapDNprobandEXP = datanonOverLapDN[,2])
     return(dataFinal)
 
 }