Browse code

All Files in new Folder

Bhattacharya authored on 20/08/2021 20:18:52
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,1365 @@
1
+#' Combining the RNAseq reads of family members in a
2
+#' single file.
3
+#'
4
+#' @param RNASeqDir  character. Directory containing RNAseq reads.
5
+#' @param returnMethod  character. Method of returning Data.
6
+#' @param outpath  character. Contains file path if Method of return is chosen as
7
+#' Text.
8
+#' @param outFileName  character. Output file name.
9
+#' @return Text or Dataframe containing TPM read counts of genes in the family.
10
+#' @examples
11
+#' \dontrun{
12
+#' RNASeqDir = system.file("extdata", package="nanotatoR")
13
+#' returnMethod="dataFrame"
14
+#' datRNASeq <- RNAseqcombine(RNASeqDir = RNASeqDir,
15
+#' returnMethod = returnMethod)
16
+#' }
17
+#' @importFrom stats na.omit
18
+#' @importFrom AnnotationDbi mapIds
19
+#' @import org.Hs.eg.db
20
+#' @export
21
+RNAseqcombine<-function(RNASeqDir,returnMethod=c("Text","dataFrame"),
22
+                        outpath="",outFileName=""){
23
+    #library(biomaRt)
24
+    #setwd(RNASeqDir)
25
+    l <- list.files(path = RNASeqDir,
26
+        pattern="*.genes.results", full.names = FALSE)
27
+    len<-length(l)
28
+    #dat<-listDatasets(ensembl)
29
+    #g1<-grep("sscrofa",listDatasets(ensembl)$dataset)
30
+    'grch37 = useMart(biomart="ENSEMBL_MART_ENSEMBL", host="www.ensembl.org",
31
+                    dataset="hsapiens_gene_ensembl")'
32
+    gen<-c();
33
+    ##Need to make this function dynamic
34
+    dat<-data.frame(matrix(ncol = len,
35
+        nrow = nrow(r<-read.table(file.path(RNASeqDir,l[1]),sep="\t",header=TRUE))))
36
+    cnam<-c()
37
+    for (ii in 1:length(l)){
38
+        r <- read.table(file.path(RNASeqDir,l[ii]),sep="\t",header=TRUE)
39
+        gen <- c(gen,as.character(r$gene_id))
40
+        dat[,ii] <- as.numeric(r$TPM)
41
+        str <- strsplit(l[ii],split=".genes.results")
42
+        #print(str[[1]][1])
43
+        cnam <- c(cnam,str[[1]][1])
44
+    }
45
+    #datf<-data.frame(dat)
46
+    gen <- unique(as.character(gen))
47
+    st <- strsplit(gen,split="[.]")
48
+    genes <- c()
49
+    for(k in 1:length(gen)){
50
+        genes<-c(genes,as.character(st[[k]][1]))
51
+    }
52
+    data1<-dat[,1:ncol(dat)]
53
+    names(data1)<-cnam
54
+    genesym<-c()
55
+    ensemblid<-c()
56
+    'gene1 = getBM(attributes = c("external_gene_name", "ensembl_gene_id"),
57
+                filters = "ensembl_gene_id", values = genes, mart = grch37)'
58
+    gn1 <- mapIds(org.Hs.eg.db, genes, "SYMBOL", "ENSEMBL")
59
+    'rn <- row.names(data.frame(gn1))
60
+    rn1 <- row.names(data.frame(gn2))'
61
+    gene1<-data.frame(
62
+        ensembl_gene_id = as.character(names((gn1))),
63
+        external_gene_name = as.character(data.frame(gn1)[,1])
64
+    )
65
+    genesym<-as.character(gene1$external_gene_name)
66
+    ensemblid<-as.character(gene1$ensembl_gene_id)
67
+    gene3<-c()
68
+    ens<-c()
69
+    ensemblid<-paste("^",ensemblid,"$",sep="")
70
+    for(kk in 1:length(genes)){
71
+        pag<-paste("^",genes[kk],"$",sep="")
72
+        val<-grep(pag,ensemblid,fixed=TRUE)
73
+		#if(length(val) > 1) {print(paste("val:", val, "kk:", kk))}
74
+        if(length(val)>0){
75
+            gene3<-c(gene3,as.character(unique(genesym[val])))
76
+            ens<-c(ens,as.character(rep(genes[kk], length(unique(genesym[val]))
77
+            )))
78
+        }
79
+        else{
80
+            gene3<-c(gene3,as.character("-"))
81
+            ens<-c(ens,as.character(genes[kk]))
82
+        }
83
+    }
84
+    RNASeqDat<-data.frame(GeneName = as.character(gene3),
85
+        GeneID = as.character(ens),data1)
86
+    if(returnMethod == "Text"){
87
+        fname = file.path(outFileName, ".csv" ,sep = "")
88
+        write.csv(RNASeqDat,file.path(outpath,fname),
89
+            row.names = FALSE)
90
+    }
91
+    else if (returnMethod == "dataFrame"){
92
+        return (RNASeqDat)
93
+    }
94
+    else{
95
+        stop("Invalid ReturnMethod")
96
+    }
97
+}
98
+
99
+#' Extract Read counts for genes that overlap SVs.
100
+#'
101
+#' @param gnsOverlap  character. genes that overlap SV.
102
+#' @param SVID  character. ID of the SVs.
103
+#' @param RNASeqData  character. Expression of the genes.
104
+#' @param pattern_Proband  character. Pattern to identify the proband reads.
105
+#' @param pattern_Father  character. Pattern to identify the father reads.
106
+#' @param pattern_Mother  character. Pattern to identify the mother reads.
107
+#' @return Text or Dataframe containing TPM read counts of genes in the family.
108
+#' @examples
109
+#' RNASeqDir = system.file("extdata", package="nanotatoR")
110
+#' returnMethod="dataFrame"
111
+#' datRNASeq <- RNAseqcombine(RNASeqDir = RNASeqDir,
112
+#' returnMethod = returnMethod)
113
+#' gnsOverlap <- c("AGL")
114
+#' SVID = 397
115
+#' datgnovrlap <- OverlapRNAseq(gnsOverlap = gnsOverlap,
116
+#' SVID = SVID, RNASeqData = datRNASeq,
117
+#' pattern_Proband = "*_P_*")
118
+#' @importFrom stats na.omit
119
+#' @export
120
+OverlapRNAseq<-function(gnsOverlap, SVID, RNASeqData,
121
+                pattern_Proband = NA, pattern_Mother = NA,
122
+                pattern_Father = NA){
123
+
124
+    ###Finding the column names
125
+    #print(gnsOverlap);print(length(gnsOverlap))
126
+    if(is.na(unique(as.character(pattern_Father))) == FALSE){
127
+        fatherInd<-grep(unique(as.character(pattern_Father)),names(RNASeqData))
128
+    } else{fatherInd<- NA}
129
+    if(is.na(unique(as.character(pattern_Mother))) == FALSE){
130
+        motherInd<-grep(unique(as.character(pattern_Mother)),names(RNASeqData))
131
+    } else{motherInd<- NA}
132
+    if(is.na(unique(as.character(pattern_Proband))) == FALSE){
133
+        probandInd<-grep(unique(as.character(pattern_Proband)),names(RNASeqData))
134
+    } else{probandInd<- NA}
135
+    'if(is.na(pattern_Sibling)==FALSE){
136
+        siblingInd<-grep(pattern_Sibling,names(RNASeqData))
137
+    }
138
+    else{
139
+        siblingInd<-NA
140
+    }'
141
+    sv<-c()
142
+    gene<-c()
143
+    gnsname<-as.character(RNASeqData$GeneName)
144
+    pasgnsname<-pasgnovlap<-paste("^",gnsname,"$",sep="")
145
+    'overlap_ensemblgenes = select(EnsDb.Hsapiens.v79, gnsOverlap,
146
+                c("GENEID","GENENAME"), "SYMBOL")
147
+    gnsOverlapID<-as.character(overlap_ensemblgenes$GENEID)'
148
+    #print(paste("gnsOverlap:",gnsOverlap))
149
+    #print(paste("overlap_ensemblgenes:",overlap_ensemblgenes))
150
+    #print(paste("gnsOverlapID:",gnsOverlapID))
151
+    #genes<-as.character(overlap_ensemblgenes$SYMBOL)
152
+    ###Extracting Reads
153
+    ###
154
+    ###Genes Names Extraction
155
+    #print(paste("fatherInd :",fatherInd))
156
+    #print(paste("motherInd :",motherInd))
157
+    #print(paste("probandInd :",probandInd))
158
+    #print(paste("siblingInd :",siblingInd))
159
+    gnsOverlapID <- as.character(gnsOverlap)
160
+    #print(gnsOverlapID)
161
+    if(length(gnsOverlapID)>1){
162
+
163
+        datGeneInfoTemp<-data.frame()
164
+        fatherReads<-c()
165
+        motherReads<-c()
166
+        probandReads<-c()
167
+        #siblingReads<-c()
168
+        for (ki in 1:length(gnsOverlapID)){
169
+            pasgnovlap<-paste("^",gnsOverlapID[ki],"$",sep="")
170
+            #print(ki)
171
+            gg<-grep(pasgnovlap,pasgnsname,fixed=TRUE)
172
+            dat_temp<-RNASeqData[gg,]
173
+
174
+            if(nrow(dat_temp)>1){
175
+                dat_temp_1<-apply(dat_temp[,3:ncol(dat_temp)],2,mean)
176
+                #dat_temp_1<-data.frame(dat_temp_1)
177
+                dat_temp1<-dat_temp[1,]
178
+                dat_temp1[,3:ncol(dat_temp)]<-dat_temp_1
179
+                 #print(dim(dat_temp1))
180
+                fathercount<-c();mothercount<-c();
181
+                probandcount<-c();
182
+                if(is.na(fatherInd[1])== FALSE){
183
+                    if(length(fatherInd)>1){
184
+                        for(j in fatherInd){
185
+                            fathercount<-c(fathercount,dat_temp1[,j])
186
+                        }
187
+                        fatherReads<-c(fatherReads,
188
+                        paste(fathercount,collapse = ":"))
189
+                    }else if(length(fatherInd)==1){
190
+                        fatherReads<-c(fatherReads,dat_temp1[,fatherInd])
191
+                    }else{
192
+                        fatherReads<-c(fatherReads,0)
193
+                    }
194
+                }else{
195
+                    fatherReads<-c(fatherReads,"-")
196
+                }
197
+                if(is.na(motherInd[1])==FALSE){
198
+                    if(length(motherInd)>1){
199
+                        for(j in motherInd){
200
+                            mothercount<-c(mothercount,dat_temp1[,j])
201
+                        }
202
+                        motherReads<-c(motherReads,paste(mothercount,
203
+                            collapse = ":"))
204
+                    }else if(length(motherInd)==1){
205
+                        motherReads<-c(motherReads,dat_temp1[,motherInd])
206
+                    }else{
207
+                        motherReads<-c(motherReads,0)
208
+                    }
209
+                }else{
210
+                    motherReads<-c(motherReads,"-")
211
+                }
212
+                if(is.na(probandInd[1])==FALSE){
213
+                    if(length(probandInd)>1){
214
+                        for(j in probandInd){
215
+                            probandcount<-c(probandcount,dat_temp1[,j])
216
+                        }
217
+                    probandReads<-c(probandReads,paste(probandcount,collapse=":"))
218
+                }else if(length(probandInd)==1){
219
+                    probandReads<-c(probandReads,dat_temp1[,probandInd])
220
+                }else{
221
+                    probandReads<-c(probandReads,0)
222
+                }
223
+                }else{
224
+                    probandReads<-c(probandReads, "-")
225
+                }
226
+
227
+            'if(is.na(siblingInd[1])==FALSE){
228
+                if(length(siblingInd)>1){
229
+                    for(j in siblingInd){
230
+                        siblingcount<-c(siblingcount,dat_temp1[,j])
231
+                    }
232
+                    siblingReads<-c(siblingReads,paste(siblingcount,collapse=":"))
233
+                }
234
+                else if(length(siblingInd)==1){
235
+              siblingReads<-c(siblingReads,dat_temp1[,siblingInd])
236
+            }
237
+            else{
238
+              siblingReads<-c(siblingReads,0)
239
+            }
240
+        }
241
+        else{
242
+            siblingReads<-c(siblingReads,"-")
243
+          }'
244
+        }
245
+        else if (nrow(dat_temp)==1) {
246
+          #print(dim(dat_temp1))
247
+          fathercount<-c();mothercount<-c();probandcount<-c();
248
+          if(is.na(fatherInd)==FALSE){
249
+          if(length(fatherInd [1])>1){
250
+          for(j in fatherInd){
251
+            fathercount<-c(fathercount,mean(dat_temp[,j]))
252
+            }
253
+            fatherReads<-c(fatherReads,paste(fathercount,collapse=":"))
254
+            }
255
+            else if(length(fatherInd)==1){
256
+            fatherReads<-c(fatherReads,mean(dat_temp[,fatherInd]))
257
+            }
258
+          else{
259
+            fatherReads<-c(fatherReads,0)
260
+            }
261
+            }
262
+            else{
263
+            fatherReads<-c(fatherReads,"-")
264
+          }
265
+          if(is.na(motherInd [1])==FALSE){
266
+          if(length(motherInd)>1){
267
+          for(j in motherInd){
268
+            mothercount<-c(mothercount,dat_temp[,j])
269
+            }
270
+            motherReads<-c(motherReads,paste(mothercount,collapse=":"))
271
+            }
272
+            else if(length(motherInd)==1){
273
+            motherReads<-c(motherReads,mean(dat_temp[,motherInd]))
274
+            }
275
+          else{
276
+            motherReads<-c(motherReads,0)
277
+            }
278
+            }
279
+            else{
280
+            motherReads<-c(motherReads, "-")
281
+          }
282
+           if(is.na(probandInd [1])==FALSE){
283
+          if(length(probandInd)>1){
284
+          for(j in probandInd){
285
+            probandcount<-c(probandcount,dat_temp[,j])
286
+            }
287
+            probandReads<-c(probandReads,paste(probandcount,collapse=":"))
288
+            }
289
+            else if(length(probandInd)==1){
290
+            probandReads<-c(probandReads,mean(dat_temp[,probandInd]))
291
+            }
292
+          else{
293
+            probandReads<-c(probandReads,0)
294
+            }
295
+            }
296
+            else{
297
+            probandReads<-c(probandReads, "-")
298
+          }
299
+
300
+          'if(is.na(siblingInd[1])==FALSE){
301
+            if(length(siblingInd)>1){
302
+            for(j in siblingInd){
303
+            siblingcount<-c(siblingcount,dat_temp[,j])
304
+            }
305
+            siblingReads<-c(siblingReads,paste(siblingcount,collapse=":"))
306
+            }
307
+            else if(length(siblingInd)==1){
308
+              siblingReads<-c(siblingReads,dat_temp[,siblingInd])
309
+            }
310
+            else{
311
+              siblingReads<-c(siblingReads,0)
312
+            }
313
+          }
314
+          else{
315
+            siblingReads<-c(siblingReads,"-")
316
+          }'
317
+        }
318
+        else{
319
+          fatherReads<-c(fatherReads,"-")
320
+          motherReads<-c(motherReads,"-")
321
+          probandReads<-c(probandReads,"-")
322
+          'if(is.na(siblingInd[1])==FALSE){
323
+            siblingReads<-c(siblingReads,"-")
324
+          }
325
+          else{
326
+            siblingReads<-c(siblingReads,"-")
327
+          }'
328
+        }
329
+        gene<-c(gene,as.character(gnsOverlapID[ki]))
330
+    }
331
+        if(is.na(probandInd[1])==FALSE){
332
+            ProbandGenes<-c()
333
+            for(ii in 1:length(gene)){
334
+                pasgene<-paste(gene[ii],"(",probandReads[ii],")",sep="")
335
+                ProbandGenes<-c(ProbandGenes,pasgene)
336
+            }
337
+            ProbandTPM<-paste(ProbandGenes,collapse=";")
338
+        } else{
339
+            ProbandTPM <- "-"
340
+           }
341
+
342
+        if(is.na(fatherInd[1])==FALSE){
343
+            FatherGenes<-c()
344
+            for(ii in 1:length(gene)){
345
+                pasgene<-paste(gene[ii],"(",fatherReads[ii],")",sep="")
346
+                FatherGenes<-c(FatherGenes,pasgene)
347
+            }
348
+        FatherTPM<-paste(FatherGenes,collapse=";")
349
+        } else{
350
+            FatherTPM <- "-"
351
+        }
352
+        if(is.na(motherInd[1])==FALSE){
353
+            MotherGenes<-c()
354
+            for(ii in 1:length(gene)){
355
+                pasgene<-paste(gene[ii],"(",motherReads[ii],")",sep="")
356
+                MotherGenes<-c(MotherGenes,pasgene)
357
+            }
358
+            MotherTPM<-paste(MotherGenes,collapse=";")
359
+        } else{
360
+               MotherTPM <- "-"
361
+        }
362
+
363
+
364
+        '        if(is.na(siblingInd[1])==FALSE){
365
+                siblingGenes<-c()
366
+                for(ii in 1:length(gene)){
367
+
368
+                pasgene<-paste(gene[ii],"(",siblingReads[ii],")",sep="")
369
+                siblingGenes<-c(siblingGenes,pasgene)
370
+                }
371
+                SiblingTPM<-paste(siblingGenes,collapse=";")
372
+                }
373
+                else{
374
+                       SiblingTPM<-"-"
375
+                    }
376
+        '
377
+        datGeneInfo<-data.frame(SVID=SVID,Probandexpression=ProbandTPM,
378
+                Fatherexpression=FatherTPM,Motherexpression=MotherTPM)
379
+
380
+    }
381
+    else if(length(gnsOverlapID)==1){
382
+       pasgnovlap<-paste("^",as.character(gnsOverlapID),"$",sep="")
383
+        #print(ki)
384
+        gg<-grep(pasgnovlap,pasgnsname,fixed=TRUE)
385
+      #gg<-grep(gnsOverlapID,gnsname,fixed=TRUE)
386
+      dat_temp<-RNASeqData[gg,]
387
+     if(nrow(dat_temp)>1){
388
+          dat_temp_1<-apply(dat_temp[,3:ncol(dat_temp)],2,mean)
389
+          #dat_temp_1<-data.frame(dat_temp_1)
390
+          dat_temp1<-dat_temp[1,]
391
+          dat_temp1[,3:ncol(dat_temp)]<-dat_temp_1
392
+          #dat_temp1<-cbind(dat_temp[1,1:3],dat_temp1)
393
+           #print(dim(dat_temp1))
394
+          fathercount<-c();mothercount<-c();probandcount<-c();
395
+          if(is.na(fatherInd[1])==FALSE){
396
+          if(length(fatherInd)>=1){
397
+          for(j in fatherInd){
398
+            fathercount<-c(fathercount,mean(dat_temp1[,j]))
399
+            }
400
+            fatherReads<-paste(fathercount,collapse=":")
401
+            }
402
+            else if(length(fatherInd)==1){
403
+            fatherReads<-mean(dat_temp1[,fatherInd])
404
+            }
405
+          else{
406
+            fatherReads<-0
407
+            }
408
+            } else{
409
+            fatherReads<- "-"
410
+            }
411
+          if(is.na(motherInd[1])==FALSE){
412
+          if(length(motherInd)>1){
413
+          for(j in motherInd){
414
+            mothercount<-c(mothercount,mean(dat_temp1[,j]))
415
+            }
416
+            motherReads<-paste(mothercount,collapse=":")
417
+            }
418
+            else if(length(motherInd)==1){
419
+            motherReads<-mean(dat_temp1[,motherInd])
420
+            }
421
+          else{
422
+            motherReads<-0
423
+            }
424
+            } else{
425
+            motherReads <- "-"
426
+            }
427
+          if(is.na(probandInd[1])==FALSE){
428
+          if(length(probandInd)>1){
429
+          for(j in probandInd){
430
+            probandcount<-c(probandcount,mean(dat_temp1[,j]))
431
+            }
432
+            probandReads<-paste(probandcount,collapse=":")
433
+            }
434
+            else if(length(probandInd)==1){
435
+            probandReads<-mean(dat_temp1[,probandInd])
436
+            }
437
+          else{
438
+            probandReads<-0
439
+            }
440
+            } else{
441
+            probandReads <- "-"
442
+            }
443
+
444
+          'if(is.na(siblingInd[1])==FALSE){
445
+            if(length(siblingInd)>1){
446
+            for(j in siblingInd){
447
+            siblingcount<-c(siblingcount,mean(dat_temp1[,j]))
448
+            }
449
+            siblingReads<-paste(siblingcount,collapse=":")
450
+            }
451
+            else if(length(siblingInd)==1){
452
+              siblingReads<-mean(dat_temp1[,siblingInd])
453
+            }
454
+            else{
455
+              siblingReads <- 0
456
+            }
457
+          }
458
+          else{
459
+                siblingReads<- "-"
460
+          }'
461
+        #motherReads<-dat_temp1[,motherInd]
462
+        #probandReads<-dat_temp1[,probandInd]
463
+        #if(is.na(siblingInd[1])==TRUE){
464
+        #siblingReads<-"-"
465
+        #}
466
+        #else{
467
+        #siblingReads<-dat_temp1[,siblingInd]
468
+        #}
469
+        }
470
+      else if (nrow(dat_temp)==1){
471
+
472
+         #print(dim(dat_temp1))
473
+          fathercount<-c();mothercount<-c();probandcount<-c();siblingcount<-c()
474
+          if(is.na(fatherInd[1])==FALSE){
475
+          if(length(fatherInd)>1){
476
+          for(j in fatherInd){
477
+            fathercount<-c(fathercount,mean(dat_temp[,j]))
478
+            }
479
+            fatherReads<-paste(fathercount,collapse=":")
480
+            }
481
+            else if(length(fatherInd)==1){
482
+            fatherReads<-mean(dat_temp[,fatherInd])
483
+            }
484
+          else{
485
+            fatherReads<-0
486
+            }
487
+            }else{
488
+                fatherReads<-"-"
489
+            }
490
+          if(is.na(motherInd[1])==FALSE){
491
+          if(length(motherInd)>1){
492
+          for(j in motherInd){
493
+            mothercount<-c(mothercount,mean(dat_temp[,j]))
494
+            }
495
+            motherReads<-paste(mothercount,collapse=":")
496
+            }
497
+            else if(length(motherInd)==1){
498
+            motherReads<-mean(dat_temp[,motherInd])
499
+            }
500
+          else{
501
+            motherReads<-0
502
+            }
503
+            } else{
504
+                motherReads<-"-"
505
+            }
506
+            if(is.na(probandInd [1])==FALSE){
507
+          if(length(probandInd)>1){
508
+          for(j in probandInd){
509
+            probandcount<-c(probandcount,mean(dat_temp[,j]))
510
+            }
511
+            probandReads<-paste(probandcount,collapse=":")
512
+            }
513
+            else if(length(probandInd)==1){
514
+            probandReads<-mean(dat_temp[,probandInd])
515
+            }
516
+          else{
517
+            probandReads<-0
518
+            }
519
+            } else{
520
+                probandReads<-"-"
521
+            }
522
+
523
+
524
+          'if(is.na(siblingInd[1])==FALSE){
525
+            if(length(siblingInd)>1){
526
+            for(j in siblingInd){
527
+            siblingcount<-c(siblingcount,mean(dat_temp[,j]))
528
+            }
529
+            siblingReads<-paste(siblingcount,collapse=":")
530
+            }
531
+            else if(length(siblingInd)==1){
532
+              siblingReads<-mean(dat_temp[,siblingInd])
533
+            }
534
+            else{
535
+              siblingReads<-0
536
+            }
537
+          }
538
+          else{
539
+            siblingReads<-"-"
540
+          }'
541
+      }
542
+      else{
543
+        fatherReads<-"-"
544
+        motherReads<-"-"
545
+        probandReads<-"-"
546
+        'if(is.na(siblingInd[1])==TRUE){
547
+        siblingReads<-"-"
548
+        }
549
+        else{
550
+        siblingReads<-"-"
551
+        }'
552
+      }
553
+      #gene<-overlap_ensemblgenes$SYMBOL
554
+      if(is.na(probandInd[1])==FALSE){
555
+        gene<- as.character(gnsOverlapID)
556
+        #ProbandGenes<-c()
557
+        ProbandGenes<-paste(gene,"(",probandReads,")",sep="")
558
+        #ProbandGenes<-c(ProbandGenes,pasgene)
559
+        ProbandTPM<-as.character(ProbandGenes)
560
+        }else{
561
+            ProbandTPM <- "-"
562
+        }
563
+        if(is.na(fatherInd[1])==FALSE){
564
+        FatherGenes<-paste(gene,"(",fatherReads,")",sep="")
565
+        FatherTPM<-as.character(FatherGenes)
566
+        }
567
+        else{
568
+            FatherTPM <- "-"
569
+        }
570
+        if(is.na(motherInd[1])==FALSE){
571
+        MotherGenes<-paste(gene,"(",motherReads,")",sep="")
572
+        #MotherGenes<-c(MotherGenes,pasgene)
573
+        #}
574
+        MotherTPM<-as.character(MotherGenes)
575
+        }
576
+        else{
577
+            MotherTPM <- "-"
578
+        }
579
+        'if(is.na(siblingInd[1])==FALSE){
580
+        siblingGenes<-paste(gene,"(",siblingReads,")",sep="")
581
+        #siblingGenes<-c(siblingGenes,pasgene)
582
+        #}
583
+        SiblingTPM<-as.character(siblingGenes)
584
+        }
585
+        else{
586
+                SiblingTPM<-"-"
587
+        }'
588
+        datGeneInfo<-data.frame(SVID=SVID,Probandexpression=ProbandTPM,
589
+                Fatherexpression=FatherTPM,Motherexpression=MotherTPM)
590
+
591
+    }
592
+    else{
593
+        datGeneInfo<-data.frame(SVID=SVID,Probandexpression="-",
594
+                    Fatherexpression="-",Motherexpression="-")
595
+
596
+
597
+
598
+    }
599
+
600
+  #print(warnings())
601
+
602
+  return(datGeneInfo)
603
+}
604
+
605
+
606
+#' Extract Read counts for genes that are near SVs.
607
+#'
608
+#' @param gnsNonOverlap  character. genes that are upstream
609
+#' and/or downstream of SV.
610
+#' @param SVID  character. ID of the SVs.
611
+#' @param RNASeqData  character. Expression of the genes.
612
+#' @param pattern_Proband  character. Pattern to identify the proband reads.
613
+#' @param pattern_Father  character. Pattern to identify the father reads.
614
+#' @param pattern_Mother  character. Pattern to identify the mother reads.
615
+#' @return Text or Dataframe containing TPM read counts of genes in the family.
616
+#' @examples
617
+#' RNASeqDir = system.file("extdata", package="nanotatoR")
618
+#' returnMethod="dataFrame"
619
+#' datRNASeq <- RNAseqcombine(RNASeqDir = RNASeqDir,
620
+#' returnMethod = returnMethod)
621
+#' gnsNonOverlap <- c("DDX11L1", "MIR1302-2HG", "OR4G4P")
622
+#' SVID = 397
623
+#' datgnnonovrlap <- nonOverlapRNAseq(gnsNonOverlap = gnsNonOverlap,
624
+#' SVID = SVID, RNASeqData = datRNASeq,
625
+#' pattern_Proband = "*_P_*")
626
+#' @importFrom stats na.omit
627
+#' @export
628
+nonOverlapRNAseq<-function(gnsNonOverlap,SVID,RNASeqData,
629
+                pattern_Proband=NA,pattern_Mother=NA,
630
+                pattern_Father=NA){
631
+  ##Biomart annotation
632
+  ###Checking if the input is empty; else if not empty add
633
+  ###expression values for each genes
634
+    datGeneInfo<-data.frame()
635
+    SVID=SVID
636
+    ###Extracting the index for the the parents
637
+    if(is.na(unique(as.character(pattern_Father))) == FALSE){
638
+        fatherInd<-grep(unique(as.character(pattern_Father)),names(RNASeqData))
639
+    } else{fatherInd<- NA}
640
+    if(is.na(unique(as.character(pattern_Mother))) == FALSE){
641
+        motherInd<-grep(unique(as.character(pattern_Mother)),names(RNASeqData))
642
+    } else{motherInd<- NA}
643
+    if(is.na(unique(as.character(pattern_Proband))) == FALSE){
644
+        probandInd<-grep(unique(as.character(pattern_Proband)),names(RNASeqData))
645
+    } else{probandInd<- NA}
646
+
647
+    ##Checking for sibling
648
+    'if(is.na(pattern_Sibling)==FALSE){
649
+      siblingInd<-grep(pattern_Sibling,names(RNASeqData))
650
+    }
651
+    else{
652
+      siblingInd<-NA
653
+    }'
654
+
655
+    gene<-c()
656
+    gnsname<-as.character(RNASeqData$GeneName)
657
+    pasgnsname<-pasgnovlap<-paste("^",as.character(gnsname),"$",sep="")
658
+    'nonoverlap_ensemblgenes = select(EnsDb.Hsapiens.v79, gnsNonOverlap,
659
+                c("GENEID","GENENAME"), "SYMBOL")
660
+    gnsnonOverlapID<-as.character(nonoverlap_ensemblgenes$GENEID)'
661
+    ###Extracting Reads
662
+    ###
663
+    ###Genes Names Extraction
664
+    gnsnonOverlapID<- as.character(gnsNonOverlap)
665
+    if(length(gnsnonOverlapID)>1){
666
+      #datGeneInfoTemp<-data.frame()
667
+      fatherReads<-c()
668
+      motherReads<-c()
669
+      probandReads<-c()
670
+      #siblingReads<-c()
671
+
672
+      for (ki in 1:length(gnsnonOverlapID)){
673
+       pasgnnonovlap<-paste("^",as.character(gnsnonOverlapID[ki]),"$",sep="")
674
+        gg<-grep(pasgnnonovlap,pasgnsname,fixed=TRUE)
675
+        dat_temp<-RNASeqData[gg,]
676
+        if(nrow(dat_temp)>1){
677
+          dat_temp_1<-apply(dat_temp[,3:ncol(dat_temp)],2,mean)
678
+          #dat_temp_1<-data.frame(dat_temp_1)
679
+          dat_temp1<-dat_temp[1,]
680
+          dat_temp1[,3:ncol(dat_temp)]<-dat_temp_1
681
+           #print(dim(dat_temp1))
682
+          fathercount<-c();mothercount<-c();probandcount<-c();siblingcount<-c()
683
+          if(is.na(fatherInd[1])==FALSE){
684
+          if(length(fatherInd)>1){
685
+          for(j in fatherInd){
686
+            fathercount<-c(fathercount,dat_temp1[,j])
687
+            }
688
+            fatherReads<-c(fatherReads,paste(fathercount,collapse=":"))
689
+            }
690
+            else if(length(fatherInd)==1){
691
+            fatherReads<-c(fatherReads,dat_temp1[,fatherInd])
692
+            }
693
+          else{
694
+            fatherReads<-c(fatherReads,0)
695
+            }
696
+            }
697
+            else{
698
+            fatherReads<-c(fatherReads,"-")
699
+          }
700
+          if(is.na(motherInd[1])==FALSE){
701
+          if(length(motherInd)>1){
702
+          for(j in motherInd){
703
+            mothercount<-c(mothercount,dat_temp1[,j])
704
+            }
705
+            motherReads<-c(motherReads,paste(mothercount,collapse=":"))
706
+            }
707
+            else if(length(motherInd)==1){
708
+            motherReads<-c(motherReads,dat_temp1[,motherInd])
709
+            }
710
+          else{
711
+            motherReads<-c(motherReads,0)
712
+            }
713
+            } else{
714
+                motherReads<-c(motherReads, "-")
715
+            }
716
+          if(is.na(probandInd[1])==FALSE){
717
+          if(length(probandInd)>1){
718
+          for(j in probandInd){
719
+            probandcount<-c(probandcount,dat_temp1[,j])
720
+            }
721
+            probandReads<-c(probandReads,paste(probandcount,collapse=":"))
722
+            }
723
+            else if(length(probandInd)==1){
724
+            probandReads<-c(probandReads,dat_temp1[,probandInd])
725
+            }
726
+          else{
727
+            probandReads<-c(probandReads,0)
728
+            }
729
+            } else{
730
+                probandReads<-c(probandReads, "-")
731
+            }
732
+          'if(is.na(siblingInd[1])==FALSE){
733
+            if(length(siblingInd)>1){
734
+            for(j in siblingInd){
735
+            siblingcount<-c(siblingcount,dat_temp1[,j])
736
+            }
737
+            siblingReads<-c(siblingReads,paste(siblingcount,collapse=":"))
738
+            }
739
+            else if(length(siblingInd)==1){
740
+              siblingReads<-c(siblingReads,dat_temp1[,siblingInd])
741
+            }
742
+            else{
743
+              siblingReads<-c(siblingReads,0)
744
+            }
745
+          }
746
+          else{
747
+            siblingReads<-c(siblingReads,"-")
748
+          }'
749
+        }
750
+        else if (nrow(dat_temp)==1) {
751
+          #print(dim(dat_temp1))
752
+          fathercount<-c();mothercount<-c();probandcount<-c();
753
+          if(is.na(fatherInd[1])==FALSE){
754
+          if(length(fatherInd)>1){
755
+          for(j in fatherInd){
756
+            fathercount<-c(fathercount,dat_temp[,j])
757
+            }
758
+            fatherReads<-c(fatherReads,paste(fathercount,collapse=":"))
759
+            }
760
+            else if(length(fatherInd)==1){
761
+            fatherReads<-c(fatherReads,dat_temp[,fatherInd])
762
+            }
763
+          else{
764
+            fatherReads<-c(fatherReads,0)
765
+            }
766
+            }
767
+            else{
768
+            fatherReads<-c(fatherReads,"-")
769
+          }
770
+
771
+          if(is.na(motherInd[1])==FALSE){
772
+          if(length(motherInd)>1){
773
+          for(j in motherInd){
774
+            mothercount<-c(mothercount,dat_temp[,j])
775
+            }
776
+            motherReads<-c(motherReads,paste(mothercount,collapse=":"))
777
+            }
778
+            else if(length(motherInd)==1){
779
+            motherReads<-c(motherReads,dat_temp[,motherInd])
780
+            }
781
+          else{
782
+            motherReads<-c(motherReads,0)
783
+            }
784
+         }
785
+            else{
786
+            motherReads<-c(motherReads,"-")
787
+          }
788
+           if(is.na(probandInd[1])==FALSE){
789
+          if(length(probandInd)>1){
790
+          for(j in probandInd){
791
+            probandcount<-c(probandcount,dat_temp[,j])
792
+            }
793
+            probandReads<-c(probandReads,paste(probandcount,collapse=":"))
794
+            }
795
+            else if(length(probandInd)==1){
796
+            probandReads<-c(probandReads,dat_temp[,probandInd])
797
+            }
798
+          else{
799
+            probandReads<-c(probandReads,0)
800
+            }
801
+           }
802
+            else{
803
+            probandReads <- c(probandReads,"-")
804
+          }
805
+          'if(is.na(siblingInd[1])==FALSE){
806
+            if(length(siblingInd)>1){
807
+            for(j in siblingInd){
808
+            siblingcount<-c(siblingcount,dat_temp[,j])
809
+            }
810
+            siblingReads<-c(siblingReads,paste(siblingcount,collapse=":"))
811
+            }
812
+            else if(length(siblingInd)==1){
813
+              siblingReads<-c(siblingReads,dat_temp[,siblingInd])
814
+            }
815
+            else{
816
+              siblingReads<-c(siblingReads,0)
817
+            }
818
+          }
819
+          else{
820
+            siblingReads<-c(siblingReads,"-")
821
+          }'
822
+        }
823
+        else{
824
+          fatherReads<-c(fatherReads,"-")
825
+          motherReads<-c(motherReads,"-")
826
+          probandReads<-c(probandReads,"-")
827
+          'if(is.na(siblingInd[1])==FALSE){
828
+            siblingReads<-c(siblingReads,"-")
829
+          }
830
+          else{
831
+            siblingReads<-c(siblingReads,"-")
832
+          }'
833
+        }
834
+        gene<-c(gene,as.character(gnsnonOverlapID[ki]))
835
+    }
836
+        if(is.na(probandInd[1])==FALSE){
837
+        ProbandGenes<-c()
838
+
839
+        for(ii in 1:length(gene)){
840
+        pasgene<-paste(gene[ii],"(",probandReads[ii],")",sep="")
841
+        ProbandGenes<-c(ProbandGenes,pasgene)
842
+        }
843
+        ProbandTPM<-paste(ProbandGenes,collapse=";")
844
+        } else{
845
+               ProbandTPM<-"-"
846
+            }
847
+         if(is.na(fatherInd[1])==FALSE){
848
+        FatherGenes<-c()
849
+        for(ii in 1:length(gene)){
850
+        pasgene<-paste(gene[ii],"(",fatherReads[ii],")",sep="")
851
+        FatherGenes<-c(FatherGenes,pasgene)
852
+        }
853
+        FatherTPM<-paste(FatherGenes,collapse=";")
854
+        }else{
855
+               FatherTPM<-"-"
856
+            }
857
+        if(is.na(motherInd[1])==FALSE){
858
+        MotherGenes<-c()
859
+        for(ii in 1:length(gene)){
860
+        pasgene<-paste(gene[ii],"(",motherReads[ii],")",sep="")
861
+        MotherGenes<-c(MotherGenes,pasgene)
862
+        }
863
+        MotherTPM<-paste(MotherGenes,collapse=";")
864
+        }else{
865
+               MotherTPM<-"-"
866
+            }
867
+
868
+        'if(is.na(siblingInd[1])==FALSE){
869
+        siblingGenes<-c()
870
+        for(ii in 1:length(gene)){
871
+        pasgene<-paste(gene[ii],"(",siblingReads[ii],")",sep="")
872
+        siblingGenes<-c(siblingGenes,pasgene)
873
+        }
874
+        SiblingTPM<-paste(siblingGenes,collapse=";")
875
+        }
876
+        else{
877
+               SiblingTPM<-"-"
878
+            }'
879
+        datGeneInfo<-data.frame(SVID=SVID,ProbandTPM=ProbandTPM,
880
+                FatherTPM=FatherTPM,MotherTPM=MotherTPM)
881
+    }
882
+    else if(length(gnsnonOverlapID)==1){
883
+    pasgnnonovlap<-paste("^",as.character(gnsnonOverlapID),"$",sep="")
884
+    gg<-grep(pasgnnonovlap,pasgnsname,fixed=TRUE)
885
+      #gg<-grep(pasgnnonovlap,gnsname,fixed=TRUE)
886
+      dat_temp<-RNASeqData[gg,]
887
+
888
+      if(nrow(dat_temp)>1){
889
+          dat_temp_1<-apply(dat_temp[,3:ncol(dat_temp)],2,mean)
890
+          #dat_temp_1<-data.frame(dat_temp_1)
891
+          dat_temp1<-dat_temp[1,]
892
+          dat_temp1[,3:ncol(dat_temp)]<-dat_temp_1
893
+           #print(dim(dat_temp1))
894
+          fathercount<-c();mothercount<-c();probandcount<-c();
895
+          if(is.na(fatherInd[1])==FALSE){
896
+          if(length(fatherInd)>1){
897
+          for(j in fatherInd){
898
+            fathercount<-c(fathercount,dat_temp1[,j])
899
+            }
900
+            fatherReads<-paste(fathercount,collapse=":")
901
+            }
902
+            else if(length(fatherInd)==1){
903
+            fatherReads<-dat_temp1[,fatherInd]
904
+            }
905
+          else{
906
+            fatherReads<-0
907
+            }
908
+             }
909
+          else{
910
+            fatherReads<- "-"
911
+          }
912
+          if(is.na(motherInd[1])==FALSE){
913
+          if(length(motherInd)>1){
914
+          for(j in motherInd){
915
+            mothercount<-c(mothercount,dat_temp1[,j])
916
+            }
917
+            motherReads<-paste(mothercount,collapse=":")
918
+            }
919
+            else if(length(motherInd)==1){
920
+            motherReads<-dat_temp1[,motherInd]
921
+            }
922
+          else{
923
+            motherReads<-0
924
+            }
925
+             }
926
+          else{
927
+            motherReads<- "-"
928
+          }
929
+          if(is.na(probandInd[1])==FALSE){
930
+          if(length(probandInd)>1){
931
+          for(j in probandInd){
932
+            probandcount<-c(probandcount,dat_temp1[,j])
933
+            }
934
+            probandReads<-paste(probandcount,collapse=":")
935
+            }
936
+            else if(length(probandInd)==1){
937
+            probandReads<-dat_temp1[,probandInd]
938
+            }
939
+          else{
940
+            probandReads<-0
941
+            }
942
+             }
943
+          else{
944
+            probandReads<- "-"
945
+          }
946
+
947
+          'if(is.na(siblingInd[1])==FALSE){
948
+            if(length(siblingInd)>1){
949
+            for(j in siblingInd){
950
+            siblingcount<-c(siblingcount,dat_temp1[,j])
951
+            }
952
+            siblingReads<-paste(siblingcount,collapse=":")
953
+            }
954
+            else if(length(siblingInd)==1){
955
+              siblingReads<-dat_temp1[,siblingInd]
956
+            }
957
+            else{
958
+              siblingReads <- 0
959
+            }
960
+          }
961
+          else{
962
+            siblingReads<- "-"
963
+          }'
964
+        #motherReads<-dat_temp1[,motherInd]
965
+        #probandReads<-dat_temp1[,probandInd]
966
+        #if(is.na(siblingInd[1])==TRUE){
967
+        #siblingReads<-"-"
968
+        #}
969
+        #else{
970
+        #siblingReads<-dat_temp1[,siblingInd]
971
+        #}
972
+        }
973
+      else if (nrow(dat_temp)==1){
974
+
975
+         #print(dim(dat_temp1))
976
+          fathercount<-c();mothercount<-c();probandcount<-c();
977
+          if(is.na(fatherInd[1])==FALSE){
978
+          if(length(fatherInd)>1){
979
+          for(j in fatherInd){
980
+            fathercount<-c(fathercount,dat_temp[,j])
981
+            }
982
+            fatherReads<-paste(fathercount,collapse=":")
983
+            }
984
+            else if(length(fatherInd)==1){
985
+            fatherReads<-dat_temp[,fatherInd]
986
+            }
987
+          else{
988
+            fatherReads<-0
989
+            }
990
+            }
991
+          else{
992
+            fatherReads<- "-"
993
+          }
994
+            if(is.na(motherInd[1])==FALSE){
995
+          if(length(motherInd)>1){
996
+          for(j in motherInd){
997
+            mothercount<-c(mothercount,dat_temp[,j])
998
+            }
999
+            motherReads<-paste(mothercount,collapse=":")
1000
+            }
1001
+            else if(length(motherInd)==1){
1002
+            motherReads<-dat_temp[,motherInd]
1003
+            }
1004
+          else{
1005
+            motherReads<-0
1006
+            }
1007
+            }
1008
+          else{
1009
+            motherReads<- "-"
1010
+          }
1011
+          if(is.na(probandInd)==FALSE){
1012
+          if(length(probandInd)>1){
1013
+          for(j in probandInd){
1014
+            probandcount<-c(probandcount,dat_temp[,j])
1015
+            }
1016
+            probandReads<-paste(probandcount,collapse=":")
1017
+            }
1018
+            else if(length(probandInd)==1){
1019
+            probandReads<-dat_temp[,probandInd]
1020
+            }
1021
+          else{
1022
+            probandReads<-0
1023
+            }
1024
+            }
1025
+          else{
1026
+            probandReads<- "-"
1027
+          }
1028
+          'if(is.na(siblingInd[1])==FALSE){
1029
+            if(length(siblingInd)>1){
1030
+            for(j in siblingInd){
1031
+            siblingcount<-c(siblingcount,dat_temp[,j])
1032
+            }
1033
+            siblingReads<-paste(siblingcount,collapse=":")
1034
+            }
1035
+            else if(length(siblingInd)==1){
1036
+              siblingReads<-dat_temp[,siblingInd]
1037
+            }
1038
+            else{
1039
+              siblingReads<-0
1040
+            }
1041
+          }
1042
+          else{
1043
+            siblingReads<-"-"
1044
+          }'
1045
+      }
1046
+      else{
1047
+        fatherReads<-"-"
1048
+        motherReads<-"-"
1049
+        probandReads<-"-"
1050
+        'if(is.na(siblingInd[1])==TRUE){
1051
+        siblingReads<-"-"
1052
+        }
1053
+        else{
1054
+        siblingReads<-"-"
1055
+        }'
1056
+      }
1057
+        gene<-c(gene,as.character(gnsnonOverlapID))
1058
+        if(is.na(probandInd[1])==FALSE){
1059
+        ProbandGenes<-c()
1060
+        ProbandGenes<-paste(gene,"(",probandReads,")",sep="")
1061
+        #ProbandGenes<-c(ProbandGenes,pasgene)
1062
+        #}
1063
+        ProbandTPM<-as.character(ProbandGenes)
1064
+        }else{
1065
+                ProbandTPM<-"-"
1066
+        }
1067
+        if(is.na(fatherInd[1])==FALSE){
1068
+        FatherGenes<-c()
1069
+        FatherGenes<-paste(gene,"(",fatherReads,")",sep="")
1070
+        FatherTPM<-as.character(FatherGenes)
1071
+        }else{
1072
+                FatherTPM <- "-"
1073
+        }
1074
+        if(is.na(motherInd[1])==FALSE){
1075
+        MotherGenes<-c()
1076
+        MotherGenes<-paste(gene,"(",motherReads,")",sep="")
1077
+        MotherTPM<-as.character(MotherGenes)
1078
+        }else{
1079
+                MotherTPM <- "-"
1080
+        }
1081
+
1082
+        'if(is.na(siblingInd[1])==FALSE){
1083
+        siblingGenes<-paste(gene,"(",siblingReads,")",sep="")
1084
+        #siblingGenes<-c(siblingGenes,pasgene)
1085
+        #}
1086
+        SiblingTPM<-as.character(siblingGenes)
1087
+        }
1088
+        else{
1089
+                SiblingTPM<-"-"
1090
+        }'
1091
+
1092
+        datGeneInfo<-data.frame(SVID=SVID,ProbandTPM=ProbandTPM,
1093
+                FatherTPM=FatherTPM,MotherTPM=MotherTPM)
1094
+    }
1095
+    else{
1096
+        datGeneInfo<-data.frame(SVID=SVID,ProbandTPM="-",
1097
+                FatherTPM="-",MotherTPM="-")
1098
+
1099
+    }
1100
+
1101
+  return(datGeneInfo)
1102
+}
1103
+
1104
+#' Extract Read counts for genes that are near
1105
+#' or overalapping SVs.
1106
+#'
1107
+#' @param input_fmt_SV  character. genes that are upstream
1108
+#' and/or downstream of SV. input_fmt_RNASeq
1109
+#' @param input_fmt_RNASeq  character. input format of RNASEQ data.
1110
+#' Text or dataframe.
1111
+#' @param smapdata  dataframe. smap data in dataframe format.
1112
+#' @param smappath  character. smap path.
1113
+#' @param RNASeqPATH  character. RNASEQ path.
1114
+#' @param outputfmt  character. Output format choice dataframe or text.
1115
+#' @param RNASeqData  character. Expression of the genes.
1116
+#' @param pattern_Proband  character. Pattern to identify the proband reads.
1117
+#' @param pattern_Father  character. Pattern to identify the father reads.
1118
+#' @param pattern_Mother  character. Pattern to identify the mother reads.
1119
+#' @param EnzymeType  character. Enzyme used. option "Dual" or "DLE".
1120
+#' @return Text or Dataframe containing TPM read counts of genes in the family.
1121
+#' @examples
1122
+#' \dontrun{
1123
+#' RNASeqDir = system.file("extdata", package="nanotatoR")
1124
+#' returnMethod="dataFrame"
1125
+#' datRNASeq <- RNAseqcombine(RNASeqDir = RNASeqDir,
1126
+#' returnMethod = returnMethod)
1127
+#' smapName="NA12878_DLE1_VAP_solo5.smap"
1128
+#' smap = system.file("extdata", smapName, package="nanotatoR")
1129
+#' datcomp<-overlapnearestgeneSearch(smap = smap,
1130
+#'    bed=bedFile, inputfmtBed = "BED", outpath,
1131
+#'    n = 3, returnMethod_bedcomp = c("dataFrame"),
1132
+#'    input_fmt_SV = "Text",
1133
+#'    EnzymeType = "SVMerge",
1134
+#'    bperrorindel = 3000,
1135
+#'    bperrorinvtrans = 50000)
1136
+#' datRNASeq1 <- SVexpression(
1137
+#'     input_fmt_SV=c("dataFrame"),
1138
+#      smapdata = datcomp,
1139
+#'     input_fmt_RNASeq=c("dataFrame"),
1140
+#'     RNASeqData = datRNASeq,
1141
+#'     outputfmt=c("datFrame"),
1142
+#'     pattern_Proband = "*_P_*", EnzymeType = c("SVMerge"))
1143
+
1144
+#'}
1145
+#' @importFrom stats na.omit
1146
+#' @export
1147
+SVexpression_duo_trio <-function(input_fmt_SV=c("Text","dataFrame"),
1148
+        smapdata,smappath,
1149
+        input_fmt_RNASeq=c("Text","dataFrame"),
1150
+        RNASeqData,RNASeqPATH,outputfmt=c("Text","datFrame"),
1151
+        pattern_Proband=NA,pattern_Mother=NA,pattern_Father=NA,
1152
+        EnzymeType = c("SVMerge", "SE")){
1153
+    ###RNASEQ Analysis data
1154
+    if(input_fmt_RNASeq=="dataFrame"){
1155
+      RNASeqData=RNASeqData
1156
+    }
1157
+    else if(input_fmt_RNASeq=="Text"){
1158
+      RNASeqData=read.csv(RNASeqPATH)
1159
+    }
1160
+    else{
1161
+      stop("Input format for RNASeq Data Incorrect")
1162
+    }
1163
+
1164
+    if(input_fmt_SV=="dataFrame"){
1165
+        smapdata = smapdata
1166
+        if(EnzymeType == "SVMerge"){
1167
+            #smapdata <- readSMap(smap, input_fmt_smap = "Text")
1168
+            SVID<-smapdata$SVIndex
1169
+        }
1170
+        else{
1171
+            #smapdata <- readSMap_DLE(smap, input_fmt_smap)
1172
+            SVID<-smapdata$SmapEntryID
1173
+        }
1174
+    }
1175
+    else if(input_fmt_SV=="Text"){
1176
+        if(EnzymeType == "SVMerge"){
1177
+            smapdata <- readSMap(smappath, input_fmt_smap = "Text")
1178
+            SVID<-smapdata$SVIndex
1179
+        }
1180
+        else{
1181
+            smapdata <- readSMap_DLE(smappath, input_fmt_smap = "Text")
1182
+            SVID<-smapdata$SmapEntryID
1183
+        }
1184
+    }
1185
+    else{
1186
+        stop("Input format for SMAP Incorrect")
1187
+    }
1188
+    ##Extracting Data
1189
+    overlapgenes<-str_trim(smapdata$OverlapGenes_strand_perc)
1190
+
1191
+    SVID<-smapdata$SVIndex
1192
+    dataOverLap<-data.frame(matrix(nrow=nrow(smapdata),ncol=4))
1193
+    ##Extracting Overlapped Genes
1194
+    #dataOverLap<-data.frame(matrix(nrow=10,ncol=5))
1195
+    names(dataOverLap)<-c("SVID","OverlapProbandTPM",
1196
+                "OverlapFatherTPM","OverlapMotherTPM")
1197
+    print("###OverlapGenes###")
1198
+    for(kk in 1:length(overlapgenes)){
1199
+        #print(kk)
1200
+        #for(kk in 1:10){
1201
+        #print(kk)
1202
+        datOverLap<-data.frame()
1203
+        print(paste("kk:",kk,sep=""))
1204
+        svID<-as.character(SVID[kk])
1205
+        if(length(grep(";",overlapgenes[kk]))>=1){
1206
+            st1<-strsplit(as.character(overlapgenes[kk]),
1207
+                split = ";")
1208
+            sttemp<-as.character(st1[[1]])
1209
+            #print("1")
1210
+            gns_overlap<-c()
1211
+            for (tt in 1:length(sttemp)){
1212
+                gn_temp<-strsplit(sttemp[tt],split="\\(")
1213
+                gns_overlap<-c(gns_overlap,as.character(gn_temp[[1]][1]))
1214
+            }
1215
+
1216
+            datOverLap<-OverlapRNAseq(gnsOverlap = as.character(gns_overlap),
1217
+                SVID = svID,RNASeqData = RNASeqData,
1218
+                pattern_Proband=pattern_Proband,pattern_Mother=pattern_Mother,
1219
+                pattern_Father=pattern_Father)
1220
+
1221
+
1222
+        }else if (length(grep("\\(",as.character(overlapgenes[kk])))>=1){
1223
+            #print("2")
1224
+            gnsOverlap<-strsplit(as.character(overlapgenes[kk]),split="\\(")[[1]][1]
1225
+
1226
+            datOverLap<-OverlapRNAseq(gnsOverlap = as.character(gnsOverlap),
1227
+                    SVID = svID,RNASeqData = RNASeqData,
1228
+                    pattern_Proband = pattern_Proband,
1229
+                    pattern_Mother = pattern_Mother,
1230
+                    pattern_Father = pattern_Father)
1231
+
1232
+        }else{
1233
+        #print(paste("OverLapDNSVID:",svID))
1234
+            datOverLap<-data.frame(SVID = svID,
1235
+                Probandexpression = "-",
1236
+                Fatherexpression = "-",
1237
+                Motherexpression = "-")
1238
+        }
1239
+        dataOverLap[kk,]<-c(as.character(datOverLap$SVID),
1240
+            Proband_OverlapGeneExpression_TPM = as.character(datOverLap$Probandexpression),
1241
+            Father_OverlapGeneExpression_TPM = as.character(datOverLap$Fatherexpression),
1242
+            Mother_OverlapGeneExpression_TPM = as.character(datOverLap$Motherexpression))
1243
+    }
1244
+
1245
+    ##Extracting NonOverlapped Genes
1246
+    nearestUPGenes<-smapdata$Upstream_nonOverlapGenes_dist_kb
1247
+    datanonOverLapUP<-data.frame(matrix(nrow=nrow(smapdata),ncol=4))
1248
+    names(datanonOverLapUP)<-c("SVID","NonOverlapUPProbandTPM",
1249
+                            "NonOverlapUPFatherTPM","NonOverlapUPMotherTPM")
1250
+    print("###NonOverlapUPStreamGenes###")
1251
+    for(ll in 1:length(nearestUPGenes)){
1252
+        #for(ll in 1:10){
1253
+
1254
+        datNonOverLapUP<-data.frame()
1255
+        #print(paste("llUP:",ll,sep=""))
1256
+        svID<-as.character(SVID[ll])
1257
+        if(length(grep(";",nearestUPGenes[ll]))>=1){
1258
+            st1<-strsplit(as.character(nearestUPGenes[ll]),split=";")
1259
+            sttemp<-as.character(st1[[1]])
1260
+            #print("1")
1261
+            gns_nonoverlap_up<-c()
1262
+            for (mm in 1:length(sttemp)){
1263
+                gn_temp<-strsplit(sttemp[mm],split="\\(")
1264
+                gns_nonoverlap_up<-c(gns_nonoverlap_up,
1265
+                    as.character(gn_temp[[1]][1]))
1266
+            }
1267
+
1268
+
1269
+            datNonOverLapUP<-nonOverlapRNAseq(
1270
+                gnsNonOverlap=as.character(gns_nonoverlap_up),
1271
+                SVID=svID,RNASeqData=RNASeqData,
1272
+                pattern_Proband=pattern_Proband,
1273
+                pattern_Mother=pattern_Mother,
1274
+                pattern_Father=pattern_Father)
1275
+
1276
+        }
1277
+        else if (length(grep("\\(",as.character(nearestUPGenes[ll])))>=1){
1278
+            #print("2")
1279
+            gnsNonOverlapUP<-strsplit(as.character(nearestUPGenes[ll]),split="\\(")[[1]][1]
1280
+            datNonOverLapUP<-nonOverlapRNAseq(
1281
+                gnsNonOverlap = as.character(gnsNonOverlapUP),
1282
+                SVID = svID,
1283
+                RNASeqData = RNASeqData,
1284
+                pattern_Proband = pattern_Proband,
1285
+                pattern_Mother = pattern_Mother,
1286
+                pattern_Father = pattern_Father)
1287
+
1288
+        }
1289
+        else{
1290
+        #print(paste("NonOverLapUPSVID:",svID))
1291
+            datNonOverLapUP<-data.frame(SVID=svID,ProbandTPM="-",FatherTPM="-",MotherTPM="-")
1292
+                }
1293
+            datanonOverLapUP[ll,]<-c(
1294
+                as.character(datNonOverLapUP$SVID),
1295
+                Proband_Upstream_nonOverlapGeneExpression_TPM = as.character(datNonOverLapUP$ProbandTPM),
1296
+                Father_Upstream_nonOverlapGeneExpression_TPM=as.character(datNonOverLapUP$FatherTPM),
1297
+                Mother_Upstream_nonOverlapGeneExpression_TPM=as.character(datNonOverLapUP$MotherTPM))
1298
+        }
1299
+
1300
+  ##Extracting NonOverlapped Down Stream Genes
1301
+    nearestDNGenes<-smapdata$Downstream_nonOverlapGenes_dist_kb
1302
+    datanonOverLapDN<-data.frame(matrix(nrow=nrow(smapdata),ncol=4))
1303
+    names(datanonOverLapDN)<-c("SVID","NonOverlapDNProbandTPM",
1304
+                            "NonOverlapDNFatherTPM","NonOverlapDNMotherTPM")
1305
+    print("###NonOverlapDNStreamGenes###")
1306
+    for(nn in 1:length(nearestDNGenes)){
1307
+    #for(nn in 1:10){
1308
+        datNonOverLapDN<-data.frame()
1309
+        # print(paste("llDN:",ll,sep=""))
1310
+        svID<-as.character(SVID[nn])
1311
+        if(length(grep(";",nearestDNGenes[nn]))>=1){
1312
+        st1<-strsplit(as.character(nearestDNGenes[nn]),split=";")
1313
+        sttemp<-as.character(st1[[1]])
1314
+        #print("1")
1315
+        gns_nonoverlap_dn<-c()
1316
+        for (mm in 1:length(sttemp)){
1317
+            gn_temp<-strsplit(sttemp[mm],split="\\(")
1318
+            gns_nonoverlap_dn<-c(gns_nonoverlap_dn,as.character(gn_temp[[1]][1]))
1319
+        }
1320
+        datNonOverLapDN<-nonOverlapRNAseq(gnsNonOverlap=as.character(gns_nonoverlap_dn),
1321
+            SVID = svID,RNASeqData = RNASeqData,
1322
+            pattern_Proband = pattern_Proband,
1323
+            pattern_Mother = pattern_Mother,
1324
+            pattern_Father=pattern_Father)
1325
+
1326
+        }
1327
+        else if (length(grep("\\(",as.character(nearestDNGenes[nn])))>=1){
1328
+            # print("2")
1329
+            gnsNonOverlapDN<-strsplit(as.character(nearestDNGenes[nn]),split="\\(")[[1]][1]
1330
+            datNonOverLapDN<-nonOverlapRNAseq(
1331
+                gnsNonOverlap = as.character(gnsNonOverlapDN),
1332
+                SVID = svID,
1333
+                RNASeqData = RNASeqData,
1334
+                pattern_Proband = pattern_Proband,
1335
+                pattern_Mother = pattern_Mother,
1336
+                pattern_Father = pattern_Father)
1337
+
1338
+        }
1339
+        else{
1340
+            #print(paste("NonOverLapDNSVID:",svID))
1341
+            #print ("SVID")
1342
+            datNonOverLapDN<-data.frame(SVID=svID,
1343
+                ProbandTPM = "-",
1344
+                FatherTPM="-",MotherTPM="-")
1345
+        }
1346
+        datanonOverLapDN[nn,]<-c(as.character(datNonOverLapDN$SVID),
1347
+        NonOverlapDNprobandEXP=as.character(datNonOverLapDN$ProbandTPM),
1348
+        NonOverlapDNfatherEXP=as.character(datNonOverLapDN$FatherTPM),
1349
+        NonOverlapDNmotherEXP=as.character(datNonOverLapDN$MotherTPM))
1350
+    }
1351
+
1352
+    dataFinal<-data.frame(smapdata,dataOverLap[,2:ncol(dataOverLap)],
1353
+            datanonOverLapUP[,2:ncol(datanonOverLapUP)],
1354
+            datanonOverLapDN[,2:ncol(datanonOverLapDN)])
1355
+return(dataFinal)
1356
+
1357
+}
1358
+
1359
+
1360
+
1361
+
1362
+
1363
+
1364
+
1365
+