Browse code

fixed plotMeth function

kamalfartiyal84 authored on 19/10/2020 10:58:00
Showing1 changed files
... ...
@@ -887,7 +887,8 @@ plotMeth <- function(grl=NULL, colors=NULL, datatype=NULL, yLim, brmeth=NULL, mc
887 887
     if(!is(org, "BSgenome"))
888 888
       stop('org has to be of class BSgenome ..')
889 889
     
890
-    gen <- org@provider_version
890
+  
891
+    gen <- metadata(org)$genome
891 892
     itrack <- IdeogramTrack(genome = gen, chromosome = chr)
892 893
     axisTrack <- GenomeAxisTrack()
893 894
     dTrack <- NULL
Browse code

setenv error fix

kamalfartiyal84 authored on 11/03/2020 15:50:16
Showing1 changed files
... ...
@@ -285,7 +285,7 @@ BSprepare <-  function(files_location, output_folder, tabixPath, bc=1.5/100) {
285 285
 
286 286
                                         # splitting chromosomes in N Mb regions ..
287 287
 splitChrs <- function(chrs, org) {
288
-    if(!is(org,"BSgenome") && !is(org,"list"))
288
+    if(!is(org,"BSgenome") & !is(org,"list"))
289 289
         stop('org has to be either a list or an object of class BSgenome ..')
290 290
     chrs_org <- seqnames(org)
291 291
     if(any(!chrs %in% chrs_org))
... ...
@@ -312,13 +312,13 @@ consolidateDMRs <- function(DmrGR, pvThr=0.05, MethDiff_Thr=NULL, log2Er_Thr=NUL
312 312
         stop('DmrGR has to be of class GRanges ..')
313 313
     if(!is.numeric(pvThr))
314 314
         stop('pvThr has to be of class numeric ..')
315
-    if(!is.null(MethDiff_Thr) && !is.numeric(MethDiff_Thr))
315
+    if(!is.null(MethDiff_Thr) & !is.numeric(MethDiff_Thr))
316 316
         stop('MethDiff_Thr has to be either NULL or of class numeric ..')
317
-    if(!is.null(log2Er_Thr) && !is.numeric(log2Er_Thr))
317
+    if(!is.null(log2Er_Thr) & !is.numeric(log2Er_Thr))
318 318
       stop('log2Er_Thr has to be either NULL or of class numeric ..')
319 319
     if(!is.numeric(GAP))
320 320
         stop('GAP has to be of class numeric ..')
321
-    if(!is.null(MethDiff_Thr) && any(!type %in% c("hyper","hypo")))
321
+    if(!is.null(MethDiff_Thr) & any(!type %in% c("hyper","hypo")))
322 322
         stop('type has to be either NULL or one of hyper and hypo ..')
323 323
     if(!is.logical(correct))
324 324
         stop('correct has to be of class logical ..')
... ...
@@ -419,7 +419,7 @@ mapBSdata2GRanges <- function(GenoRanges, Sample, context='all', mC=1, depth=0,
419 419
         stop('Sample has to be of class BSdata ..')
420 420
     if(length(which(!(context %in% c('all','CG','CHG','CHH')))) > 0)
421 421
         stop('context has to be either all or a combination of CG, CHG, and CHH ..')
422
-    if(!is.numeric(mC) && mC < 0 )
422
+    if(!is.numeric(mC) & mC < 0 )
423 423
         stop('mC has to be of class numeric and positive..')
424 424
     if(!is.numeric(depth))
425 425
         stop('depth has to be of class numeric ..')
... ...
@@ -456,12 +456,12 @@ mapBSdata2GRanges <- function(GenoRanges, Sample, context='all', mC=1, depth=0,
456 456
         if(pValue < 1) indsList$p <- which(mcols(gr)$Significance > pV)
457 457
         tableInds <- table(unlist(indsList))
458 458
         inds <- names(tableInds)[tableInds == length(indsList)]
459
-        if(is.null(inds) || length(inds) == 0) return(NA)
459
+        if(is.null(inds) | length(inds) == 0) return(NA)
460 460
         return(gr[as.numeric(inds),])
461 461
     }
462 462
 
463 463
 
464
-    if(context != 'all' || mC > 1 || depth > 0 || pValue < 1) {
464
+    if(context != 'all' | mC > 1 | depth > 0 | pValue < 1) {
465 465
         NAinds <- as.numeric(which(is.na(res)))
466 466
         if(length(NAinds) > 0) {
467 467
             res[-NAinds] <- lapply(res[-NAinds], filterFun, context, mC, depth, pValue)
... ...
@@ -479,11 +479,11 @@ mapBSdata2GRanges <- function(GenoRanges, Sample, context='all', mC=1, depth=0,
479 479
 extractBinGRanges <- function(GenoRanges, bin, nbins) {
480 480
     if(!is(GenoRanges, "GRanges"))
481 481
         stop('GenoRanges has to be of class GRanges ..')
482
-    if(!is.numeric(bin) || length(bin) != 1 || is.na(bin))
482
+    if(!is.numeric(bin) | length(bin) != 1 | is.na(bin))
483 483
         stop(' bin has to be a single non-NA integer ')
484
-    if(!is.numeric(nbins) || length(nbins) != 1 || is.na(nbins))
484
+    if(!is.numeric(nbins) | length(nbins) != 1 | is.na(nbins))
485 485
         stop(' nbins has to be a single non-NA integer ')
486
-    if( bin < 1 || bin > nbins)
486
+    if( bin < 1 | bin > nbins)
487 487
         stop(' bin has to be between 1 and nbins ')
488 488
     binsize <- round(width(GenoRanges)/nbins)
489 489
     starts <- start(GenoRanges) + (bin-1)*binsize
... ...
@@ -505,7 +505,7 @@ mapBSdata2GRangesBin <- function(GenoRanges, Sample,
505 505
         stop('Sample has to be of class BSdata ..')
506 506
     if(length(which(!(context %in% c('all','CG','CHG','CHH')))) > 0)
507 507
         stop('context has to be either all or a combination of CG, CHG, and CHH ..')
508
-    if(!is.numeric(mC) && mC < 0 )
508
+    if(!is.numeric(mC) & mC < 0 )
509 509
         stop('mC has to be of class numeric and positive..')
510 510
     if(!is.numeric(depth))
511 511
         stop('depth has to be of class numeric ..')
... ...
@@ -652,13 +652,13 @@ profileDNAmetBin <- function(GenoRanges, Sample,
652 652
         stop('Sample has to be of class BSdata ..')
653 653
     if(length(which(!(mcCLASS %in% c('mCG','mCHG','mCHH')))) > 0)
654 654
         stop('mcCLASS has to be one of mCG, mCHG, and mCHH ..')
655
-    if(!is.numeric(mC) && mC < 0 )
655
+    if(!is.numeric(mC) & mC < 0 )
656 656
         stop('mC has to be of class numeric and positive..')
657 657
     if(!is.numeric(depthThr))
658 658
         stop('depthThr has to be of class numeric ..')
659 659
     if(!is.numeric(mCpv))
660 660
         stop('mCpv has to be of class numeric ..')
661
-    if(!is.null(minCoverage) && !is.numeric(minCoverage))
661
+    if(!is.null(minCoverage) & !is.numeric(minCoverage))
662 662
         stop('minCoverage has to be either NULL or of class numeric ..')
663 663
     if(!is.numeric(nbins))
664 664
         stop('nbins has to be of class numeric ..')
... ...
@@ -777,15 +777,15 @@ profileDNAmetBinParallel <- function(GenoRanges, Sample, mcCLASS='mCG',
777 777
         stop('Sample has to be of class BSdata ..')
778 778
     if(length(which(!(mcCLASS %in% c('mCG','mCHG','mCHH')))) > 0)
779 779
         stop('mcCLASS has to be one of mCG, mCHG, and mCHH ..')
780
-    if(!is.numeric(mC) && mC < 0 )
780
+    if(!is.numeric(mC) & mC < 0 )
781 781
         stop('mC has to be of class numeric and positive..')
782 782
     if(!is.numeric(depthThr))
783 783
         stop('depthThr has to be of class numeric ..')
784 784
     if(!is.numeric(mCpv))
785 785
         stop('mCpv has to be of class numeric ..')
786
-    if(!is.null(minCoverage) && !is.numeric(minCoverage))
786
+    if(!is.null(minCoverage) & !is.numeric(minCoverage))
787 787
         stop('minCoverage has to be either NULL or of class numeric ..')
788
-    if(!is.numeric(Nproc) && Nproc < 1)
788
+    if(!is.numeric(Nproc) & Nproc < 1)
789 789
         stop('Nproc has to be of class numeric ..')
790 790
     if(!is.numeric(nbins))
791 791
         stop('nbins has to be of class numeric ..')
... ...
@@ -827,23 +827,23 @@ profileDNAmetBinParallel <- function(GenoRanges, Sample, mcCLASS='mCG',
827 827
 
828 828
 plotMeth <- function(grl=NULL, colors=NULL, datatype=NULL, yLim, brmeth=NULL, mcContext="CG", annodata=NULL, 
829 829
                      transcriptDB, chr, start, end, org){
830
-    if(!is.null(grl) && !is(grl,'list') && !is(grl,'GElist'))
830
+    if(!is.null(grl) & !is(grl,'list') & !is(grl,'GElist'))
831 831
         stop('grl has to be either NULL or an object of class list or GElist...')
832 832
     if(is(grl,'list'))
833 833
         {
834 834
             if(any(!(sapply(grl, class) %in% c("GRanges", 'GEcollection'))))
835 835
                 stop('grl has to be a list of either GRanges or GEcollection object...')
836 836
         }
837
-    if(!is.null(brmeth) && !is(brmeth,'list'))
837
+    if(!is.null(brmeth) & !is(brmeth,'list'))
838 838
       stop('brmeth has to be either NULL or of class list ...')
839 839
     if(is(brmeth,'list'))
840 840
     {
841 841
       if(any(!(sapply(brmeth, class) %in% c("BSdata"))))
842 842
         stop('grl has to be a list of BSdata object...')
843 843
     }
844
-    if(is.null(grl) && is.null(brmeth))
844
+    if(is.null(grl) & is.null(brmeth))
845 845
       stop('both grl and brmeth cannot be NULL...')
846
-    if(!is.null(colors) && !is.character(colors))
846
+    if(!is.null(colors) & !is.character(colors))
847 847
       stop('colors has to be either NULL or of class character ...')
848 848
     
849 849
     if(!is.null(grl))
... ...
@@ -867,16 +867,16 @@ plotMeth <- function(grl=NULL, colors=NULL, datatype=NULL, yLim, brmeth=NULL, mc
867 867
             stop('datatype has to be an array containing
868 868
            one of: density, C, mC, rC, cols')
869 869
         }
870
-        if(length(datatype) != length(grl) && length(grl) != length(yLim))
870
+        if(length(datatype) != length(grl) & length(grl) != length(yLim))
871 871
           stop('grl, datatype and yLim has to be of same length ..')
872 872
       }
873 873
     }
874 874
     
875 875
     if(length(which(!(mcContext %in% c('all','CG','CHG','CHH')))) > 0)
876 876
       stop('mcContext has to be one of all, CG, CHG, and CHH ..')
877
-    if(!is.null(annodata) && !is(annodata,'GRangesList'))
877
+    if(!is.null(annodata) & !is(annodata,'GRangesList'))
878 878
       stop('annodata has to be either NULL or of class GRangesList ...')
879
-    if(!is(transcriptDB,"TxDb") && !is.null(transcriptDB))
879
+    if(!is(transcriptDB,"TxDb") & !is.null(transcriptDB))
880 880
         stop('transcriptDB has to be either NULL or an object of class TxDb ..')
881 881
     if(!is.character(chr))
882 882
         stop('chr has to be of class character ..')
Browse code

cposdensity

kamalfartiyal84 authored on 07/03/2019 16:25:39
Showing1 changed files
... ...
@@ -632,7 +632,7 @@ getCposDensity<- function(GenoRanges, Cpos, nbins) {
632 632
     Nbins <- nbins
633 633
     for(i in 1:length(Cpos)) {
634 634
         widths <- rep(round(allwidths[i]/Nbins), Nbins)
635
-        if(is.na(Cpos[[i]]))
635
+        if(all(is.na(Cpos[[i]])))
636 636
           Cpos[[i]] <- rep(0,nbins)
637 637
         else
638 638
           Cpos[[i]] <- sapply(Cpos[[i]], length)/widths
... ...
@@ -710,7 +710,7 @@ profileDNAmetBin <- function(GenoRanges, Sample,
710 710
     Cpos <- getCpos(GenoRanges= GenoRanges,
711 711
                     seqContext= sub('m','', mcCLASS),
712 712
                     org= Sample@org, nbins=Nbins)
713
-    suppressWarnings(CposD <- getCposDensity(GenoRanges, Cpos=Cpos, nbins=Nbins))
713
+    CposD <- getCposDensity(GenoRanges, Cpos=Cpos, nbins=Nbins)
714 714
     binC <- matrix(unlist(CposD), length(CposD), Nbins, byrow=T)
715 715
 
716 716
                                         # reversing for minus strand if strand is defined
Browse code

Fixed pool.reads

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@114853 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 16/03/2016 15:49:12
Showing1 changed files
... ...
@@ -134,7 +134,7 @@ pool.reads <- function(files_location)
134 134
   setwd(files_location)
135 135
   cmd <- paste("cat", paste(all_files, collapse= " "), ">", paste0(files_location, "/", "sample_merge.txt"))
136 136
   system(cmd)
137
-  python.loc <- system.file("exec", "pools_reads.py", package="methylPipe")
137
+  python.loc <- system.file("exec", "pool_reads.py", package="methylPipe")
138 138
   merge_file_loc <- paste0(files_location, "/", "sample_merge.txt")
139 139
   cmd <- paste("python", python.loc, merge_file_loc, ">",paste0(files_location, "/","sample_merge_pooled.txt"))
140 140
   system(cmd)
Browse code

Updated pool.reads

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@114830 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 16/03/2016 09:32:13
Showing1 changed files
... ...
@@ -135,7 +135,6 @@ pool.reads <- function(files_location)
135 135
   cmd <- paste("cat", paste(all_files, collapse= " "), ">", paste0(files_location, "/", "sample_merge.txt"))
136 136
   system(cmd)
137 137
   python.loc <- system.file("exec", "pools_reads.py", package="methylPipe")
138
-  #python.loc <- "/data/BA/kkishore/Collaboration_Tel_Aviv/methcall/test_merge/pools_reads.py"
139 138
   merge_file_loc <- paste0(files_location, "/", "sample_merge.txt")
140 139
   cmd <- paste("python", python.loc, merge_file_loc, ">",paste0(files_location, "/","sample_merge_pooled.txt"))
141 140
   system(cmd)
Browse code

Updated profileDNAmetBin

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@114158 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 02/03/2016 08:28:56
Showing1 changed files
... ...
@@ -746,16 +746,17 @@ profileDNAmetBin <- function(GenoRanges, Sample,
746 746
     col_names <- c(1:Nbins)
747 747
     binmC <- signif(binmC, 3)
748 748
     colnames(binmC) <- col_names
749
-    #rownames(binmC) <- NULL
749
+    rownames(binmC) <- NULL
750 750
     binC <- signif(binC, 3)
751 751
     colnames(binC) <- col_names
752
-    #rownames(binC) <- NULL
752
+    rownames(binC) <- NULL
753 753
     binrC <- signif(binrC, 3)
754 754
     colnames(binrC) <- col_names
755
-    #rownames(binrC) <- NULL
755
+    rownames(binrC) <- NULL
756 756
     binscore <- matrix(NA, nrow(binmC),
757 757
                        ncol(binmC), dimnames=list(row_names, col_names))
758
-    rownames(GenoRanges) <- row_names
758
+    rownames(binscore) <- NULL
759
+    #rownames(GenoRanges) <- row_names
759 760
                                         # saving chr results
760 761
     Object <- new("GEcollection",
761 762
                   SummarizedExperiment(assays=list(binmC=binmC,
Browse code

Updated profileDNAmetBin

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@114089 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 29/02/2016 08:03:46
Showing1 changed files
... ...
@@ -746,12 +746,16 @@ profileDNAmetBin <- function(GenoRanges, Sample,
746 746
     col_names <- c(1:Nbins)
747 747
     binmC <- signif(binmC, 3)
748 748
     colnames(binmC) <- col_names
749
+    #rownames(binmC) <- NULL
749 750
     binC <- signif(binC, 3)
750 751
     colnames(binC) <- col_names
752
+    #rownames(binC) <- NULL
751 753
     binrC <- signif(binrC, 3)
752 754
     colnames(binrC) <- col_names
755
+    #rownames(binrC) <- NULL
753 756
     binscore <- matrix(NA, nrow(binmC),
754 757
                        ncol(binmC), dimnames=list(row_names, col_names))
758
+    rownames(GenoRanges) <- row_names
755 759
                                         # saving chr results
756 760
     Object <- new("GEcollection",
757 761
                   SummarizedExperiment(assays=list(binmC=binmC,
Browse code

Updated profileDNAmetBin

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@113977 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 24/02/2016 11:31:26
Showing1 changed files
... ...
@@ -758,7 +758,7 @@ profileDNAmetBin <- function(GenoRanges, Sample,
758 758
                                            binC=binC, binrC=binrC,
759 759
                                            binscore=binscore),
760 760
                                        rowRanges=GenoRanges))
761
-    rownames(Object) <- row_names
761
+    #rownames(Object) <- row_names
762 762
     Object
763 763
 }
764 764
 
Browse code

Updated devel version

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@112867 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 25/01/2016 16:29:03
Showing1 changed files
... ...
@@ -108,7 +108,7 @@ meth.call <- function(files_location, output_folder, no_overlap, read.context, N
108 108
             ind <- sapply(cov, function(x)(length(runValue(x))))
109 109
             cov <- cov[which(ind > 1)]
110 110
             uncov_GR <- as(cov, "GRanges")
111
-            #uncov_GR <- uncov_GR[mcols(uncov_GR)$score== 0]
111
+            uncov_GR <- uncov_GR[mcols(uncov_GR)$score== 0]
112 112
             filename <- file.path(output_folder, paste0(sample_name[[i]],"_uncov", ".Rdata"))
113 113
             Objectout <- paste0(sample_name[[i]],"_uncov")
114 114
             assign(Objectout, uncov_GR)
... ...
@@ -124,6 +124,30 @@ meth.call <- function(files_location, output_folder, no_overlap, read.context, N
124 124
     message()
125 125
 }
126 126
 
127
+#### combining reads of replicates within a single group 
128
+pool.reads <- function(files_location)
129
+{
130
+  if(!is.character(files_location))
131
+    stop('files_location has to be of class character ..')
132
+  files_location <- normalizePath(files_location)
133
+  all_files <- list.files(files_location,pattern=".txt")
134
+  setwd(files_location)
135
+  cmd <- paste("cat", paste(all_files, collapse= " "), ">", paste0(files_location, "/", "sample_merge.txt"))
136
+  system(cmd)
137
+  python.loc <- system.file("exec", "pools_reads.py", package="methylPipe")
138
+  #python.loc <- "/data/BA/kkishore/Collaboration_Tel_Aviv/methcall/test_merge/pools_reads.py"
139
+  merge_file_loc <- paste0(files_location, "/", "sample_merge.txt")
140
+  cmd <- paste("python", python.loc, merge_file_loc, ">",paste0(files_location, "/","sample_merge_pooled.txt"))
141
+  system(cmd)
142
+  cmd <- paste("sort -k1,1 -k2,4n", paste0(files_location, "/", "sample_merge_pooled.txt") , ">", 
143
+               paste0(files_location, "/", "sample_merge_pooled_sort.txt"))
144
+  system(cmd)
145
+  cmd <- paste("rm","*_merge.txt")
146
+  system(cmd)
147
+  cmd <- paste("rm","*_merge_pooled.txt")
148
+  system(cmd)
149
+}
150
+
127 151
 
128 152
 tabixdata2GR <- function(x) {
129 153
     if(length(x) == 0) return(NA)
Browse code

Updated meth.call function coverage method with anamoly noticed in Athaliana

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@108594 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 17/09/2015 13:14:02
Showing1 changed files
... ...
@@ -104,7 +104,8 @@ meth.call <- function(files_location, output_folder, no_overlap, read.context, N
104 104
             aln_file <- bam.files[i]
105 105
             aln <- readGAlignments(aln_file)
106 106
             cov <- coverage(aln)
107
-            ind <- sapply(cov, function(x)(length(which(runValue(x)== 0))))
107
+            #ind <- sapply(cov, function(x)(length(which(runValue(x)== 0))))
108
+            ind <- sapply(cov, function(x)(length(runValue(x))))
108 109
             cov <- cov[which(ind > 1)]
109 110
             uncov_GR <- as(cov, "GRanges")
110 111
             #uncov_GR <- uncov_GR[mcols(uncov_GR)$score== 0]
Browse code

updated profileDNAmetBinParallel

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@103950 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 19/05/2015 16:16:49
Showing1 changed files
... ...
@@ -653,6 +653,10 @@ profileDNAmetBin <- function(GenoRanges, Sample,
653 653
     binmC <- matrix(unlist(mlsum), length(mlsum), Nbins, byrow=T)
654 654
     unmethList <- list()
655 655
     uncov <- Sample@uncov
656
+    uncov <- uncov[mcols(uncov)$score <= depthThr]
657
+    seqlengths(uncov) <- NA
658
+    seqlengths(GenoRanges) <- NA
659
+    ### missing sequencing coverage has value NA and unmethylated has value 0
656 660
     for(bin in 1:Nbins) {
657 661
       gel <- extractBinGRanges(GenoRanges=GenoRanges, bin=bin, nbins=Nbins)
658 662
       ov <- findOverlaps(gel,uncov)
... ...
@@ -663,8 +667,11 @@ profileDNAmetBin <- function(GenoRanges, Sample,
663 667
         val[ind] <- NA
664 668
         unmethList[[bin]] <- val 
665 669
       }
666
-      else 
667
-        unmethList[[bin]] <- 0
670
+      else
671
+      {
672
+        val <- rep(0,length(GenoRanges))
673
+        unmethList[[bin]] <- val
674
+      }  
668 675
     }
669 676
     
670 677
     unmethList <- matrix(unlist(unmethList), Nbins, length(GenoRanges), byrow=T)
... ...
@@ -683,6 +690,10 @@ profileDNAmetBin <- function(GenoRanges, Sample,
683 690
     binC <- matrix(unlist(CposD), length(CposD), Nbins, byrow=T)
684 691
 
685 692
                                         # reversing for minus strand if strand is defined
693
+    
694
+    if(is.null(dim(binmC)))
695
+      binmC <- matrix(binmC, nrow=1, ncol=length(binmC), byrow=T)
696
+    
686 697
     minusStrand <- which(as.character(strand(GenoRanges)) == '-')
687 698
     if(length(minusStrand) > 0) {
688 699
         binmC[minusStrand,] <- binmC[minusStrand, Nbins:1]
... ...
@@ -699,9 +710,7 @@ profileDNAmetBin <- function(GenoRanges, Sample,
699 710
     binrC[is.infinite(binrC)] <- NA
700 711
 
701 712
                                         # adding uncovered region information
702
-    maskUncovered <- Sample@uncov
703
-    seqlengths(maskUncovered) <- NA
704
-    seqlengths(GenoRanges) <- NA
713
+    maskUncovered <- uncov
705 714
     suppressWarnings(nonCoveredInds <- findOverlaps(GenoRanges, maskUncovered))
706 715
     orgInds <- queryHits(nonCoveredInds)
707 716
     binrC[orgInds,] <- NA
Browse code

Fixed cytosine counting bug

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@103572 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 08/05/2015 15:21:45
Showing1 changed files
... ...
@@ -589,7 +589,7 @@ getCposChr <- function(GenoRanges, seqContext, chrseq, nbins) {
589 589
             endPos <- binsize
590 590
             for(bin in 1:Nbins)
591 591
                 {
592
-                    ind <- which(Cpos[[i]] > startPos & Cpos[[i]] < endPos)
592
+                    ind <- which(Cpos[[i]] >= startPos & Cpos[[i]] <= endPos)
593 593
                     binList[[bin]] <- Cpos[[i]][ind]
594 594
                     startPos <- endPos+1
595 595
                     endPos <- endPos+binsize
Browse code

Updated coverage information in meth.call & findDMRs

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@103563 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 08/05/2015 10:06:51
Showing1 changed files
... ...
@@ -107,7 +107,7 @@ meth.call <- function(files_location, output_folder, no_overlap, read.context, N
107 107
             ind <- sapply(cov, function(x)(length(which(runValue(x)== 0))))
108 108
             cov <- cov[which(ind > 1)]
109 109
             uncov_GR <- as(cov, "GRanges")
110
-            uncov_GR <- uncov_GR[mcols(uncov_GR)$score== 0]
110
+            #uncov_GR <- uncov_GR[mcols(uncov_GR)$score== 0]
111 111
             filename <- file.path(output_folder, paste0(sample_name[[i]],"_uncov", ".Rdata"))
112 112
             Objectout <- paste0(sample_name[[i]],"_uncov")
113 113
             assign(Objectout, uncov_GR)
Browse code

Updating the development version same as release branch

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@103137 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 27/04/2015 13:50:28
Showing1 changed files
... ...
@@ -429,7 +429,7 @@ mapBSdata2GRanges <- function(GenoRanges, Sample, context='all', mC=1, depth=0,
429 429
         if(mC > 0) indsList$mC <- which(mcols(gr)$C > mC)
430 430
         if(depth > 0) indsList$d <- which((mcols(gr)$C+mcols(gr)$T) > depth)
431 431
         pV <- -10*log10(pValue)
432
-        if(pValue < 1) indsList$p <- which(mcols(gr)$significance > pV)
432
+        if(pValue < 1) indsList$p <- which(mcols(gr)$Significance > pV)
433 433
         tableInds <- table(unlist(indsList))
434 434
         inds <- names(tableInds)[tableInds == length(indsList)]
435 435
         if(is.null(inds) || length(inds) == 0) return(NA)
... ...
@@ -723,7 +723,7 @@ profileDNAmetBin <- function(GenoRanges, Sample,
723 723
                   SummarizedExperiment(assays=list(binmC=binmC,
724 724
                                            binC=binC, binrC=binrC,
725 725
                                            binscore=binscore),
726
-                                       rowData=GenoRanges))
726
+                                       rowRanges=GenoRanges))
727 727
     rownames(Object) <- row_names
728 728
     Object
729 729
 }
... ...
@@ -787,152 +787,138 @@ profileDNAmetBinParallel <- function(GenoRanges, Sample, mcCLASS='mCG',
787 787
 
788 788
                                   # function to plot gene locus visualization with methylation/omics data
789 789
 
790
-plotMeth <- function(grl, colors=NULL, datatype, yLim, brmeth=NULL, mcContext="CG", annodata=NULL, Datatrackname,
791
-                     transcriptDB, chr, start=NULL, end=NULL, org){
792
-    if(!is(grl,'list') && !is(grl,'GElist'))
793
-        stop('grl has to be of class list or GElist...')
790
+plotMeth <- function(grl=NULL, colors=NULL, datatype=NULL, yLim, brmeth=NULL, mcContext="CG", annodata=NULL, 
791
+                     transcriptDB, chr, start, end, org){
792
+    if(!is.null(grl) && !is(grl,'list') && !is(grl,'GElist'))
793
+        stop('grl has to be either NULL or an object of class list or GElist...')
794 794
     if(is(grl,'list'))
795 795
         {
796 796
             if(any(!(sapply(grl, class) %in% c("GRanges", 'GEcollection'))))
797
-                stop('grl has to be a list of either GRanges or GEcollection objects...')
797
+                stop('grl has to be a list of either GRanges or GEcollection object...')
798 798
         }
799
-    if(!is.null(colors) && !is.character(colors))
800
-      stop('colors has to be either NULL or of class character ...')
801
-    if(is.character(colors))
802
-    {
803
-      if(length(colors) != length(grl))
804
-        stop('length of colors has to be same as length of grl ...')
805
-    }
806
-    for(i in 1:length(datatype)) {
807
-      dti <- datatype[i]
808
-      if(!(dti %in% c('density', 'C','mC', 'rC','cols', 'gr')))
809
-        stop('datatype has to be an array containing
810
-           one of: density, C, mC, rC, cols or gr')
811
-    }
812
-    if(!is.numeric(yLim))
813
-      stop('yLim has to be of class numeric ..')
814
-    if(!is.null(brmeth) && !is(brmeth,'BSdataSet'))
815
-      stop('brmeth has to be either NULL or of class BSdataSet ...')
816
-    if(length(which(!(mcContext %in% c('CG','CHG','CHH')))) > 0)
817
-      stop('mcContext has to be one of CG, CHG, and CHH ..')
818
-    if(!is.null(annodata) && !is(annodata,'GRangesList'))
819
-      stop('annodata has to be either NULL or of class GRangesList ...')
820
-    if(!is.character(Datatrackname))
821
-      stop('Datatrackname has to be of class character ...')
822
-    if(!is.null(annodata) && !is.null(brmeth))
799
+    if(!is.null(brmeth) && !is(brmeth,'list'))
800
+      stop('brmeth has to be either NULL or of class list ...')
801
+    if(is(brmeth,'list'))
823 802
     {
824
-      if(length(Datatrackname) != length(grl)+length(annodata)+length(brmeth))
825
-        stop('Datatrackname has to be equal to length of grl, brmeth and annodata...')
826
-      grl_trackname <- Datatrackname[1:length(grl)]
827
-      brmeth_trackname <- Datatrackname[length(grl)+1:length(brmeth)]
828
-      anno_trackname <- Datatrackname[length(grl)+length(brmeth)+1:length(annodata)]
803
+      if(any(!(sapply(brmeth, class) %in% c("BSdata"))))
804
+        stop('grl has to be a list of BSdata object...')
829 805
     }
830
-    else
806
+    if(is.null(grl) && is.null(brmeth))
807
+      stop('both grl and brmeth cannot be NULL...')
808
+    if(!is.null(colors) && !is.character(colors))
809
+      stop('colors has to be either NULL or of class character ...')
810
+    
811
+    if(!is.null(grl))
831 812
     {
832
-      if(!is.null(brmeth))
833
-      {
834
-        if(length(Datatrackname) != length(grl)+length(brmeth))
835
-          stop('Datatrackname has to be equal to length of grl and brmeth')
836
-        grl_trackname <- Datatrackname[1:length(grl)]
837
-        brmeth_trackname <- Datatrackname[length(grl)+1:length(brmeth)]
838
-      }
839
-      if(!is.null(annodata))
813
+      if(is.null(names(grl)))
814
+        stop('names of grl cannot be NULL...')
815
+      if(is.null(datatype))
816
+        stop('datatype cannot be NULL when grl is defined and has to be of length equal to grl...')
817
+      if(is.character(colors))
840 818
       {
841
-        if(length(Datatrackname) != length(grl)+length(annodata))
842
-          stop('Datatrackname has to be equal to length of grl and annodata...')
843
-        grl_trackname <- Datatrackname[1:length(grl)]
844
-        anno_trackname <- Datatrackname[length(grl)+1:length(annodata)]
819
+        if(length(colors) != length(grl))
820
+          stop('length of colors has to be same as length of grl ...')
845 821
       }
846
-      if(is.null(annodata) && is.null(brmeth))
822
+      if(!is.numeric(yLim))
823
+        stop('yLim has to be of class numeric ..')
824
+      if(!is.null(datatype))
847 825
       {
848
-        if(length(Datatrackname) != length(grl))
849
-          stop('Datatrackname has to be equal to length of grl...')
850
-        grl_trackname <- Datatrackname[1:length(grl)]
826
+        for(i in 1:length(datatype)) {
827
+          dti <- datatype[i]
828
+          if(!(dti %in% c('density', 'C','mC', 'rC','cols')))
829
+            stop('datatype has to be an array containing
830
+           one of: density, C, mC, rC, cols')
831
+        }
832
+        if(length(datatype) != length(grl) && length(grl) != length(yLim))
833
+          stop('grl, datatype and yLim has to be of same length ..')
851 834
       }
852 835
     }
853
-
854
-    if(length(datatype) != length(grl) && length(grl) != length(yLim))
855
-      stop('grl, datatype and yLim has to be of same length ..')
836
+    
837
+    if(length(which(!(mcContext %in% c('all','CG','CHG','CHH')))) > 0)
838
+      stop('mcContext has to be one of all, CG, CHG, and CHH ..')
839
+    if(!is.null(annodata) && !is(annodata,'GRangesList'))
840
+      stop('annodata has to be either NULL or of class GRangesList ...')
856 841
     if(!is(transcriptDB,"TxDb") && !is.null(transcriptDB))
857 842
         stop('transcriptDB has to be either NULL or an object of class TxDb ..')
858 843
     if(!is.character(chr))
859 844
         stop('chr has to be of class character ..')
860
-    if(!is.null(start) && !is.numeric(start))
861
-      stop('start has to be of either NULL or of class numeric ..')
862
-    if(!is.null(end) && !is.numeric(end))
863
-        stop('end has to be of either NULL or of class numeric ..')
845
+    if(!is.numeric(start))
846
+      stop('start has to be of class numeric ..')
847
+    if(!is.numeric(end))
848
+        stop('end has to be of class numeric ..')
864 849
     if(!is(org, "BSgenome"))
865 850
       stop('org has to be of class BSgenome ..')
866
-
851
+    
867 852
     gen <- org@provider_version
868 853
     itrack <- IdeogramTrack(genome = gen, chromosome = chr)
869 854
     axisTrack <- GenomeAxisTrack()
870
-    matlist <- list()
871
-    dTrack <- list()
872
-    for (i in 1:length(grl))
873
-        {
874
-            if(datatype[i] == 'mC') {
875
-                refgr <- rowRanges(grl[[i]])
876
-                matlist[[i]] <- t(binmC(grl[[i]]))
877
-            }
878
-            if(datatype[i] == 'C') {
879
-                refgr <- rowRanges(grl[[i]])
880
-                matlist[[i]] <- t(binC(grl[[i]]))
881
-            }
882
-            if(datatype[i] == 'rC') {
883
-                refgr <- rowRanges(grl[[i]])
884
-                matlist[[i]] <- t(binrC(grl[[i]]))
885
-            }
886
-            if(datatype[i] == 'density') {
887
-                refgr <- rowRanges(grl[[i]])
888
-                matlist[[i]] <- t(binscore(grl[[i]]))
889
-            }
890
-            if(datatype[i] == 'cols') {
891
-                refgr <- rowRanges(grl[[i]])
892
-                matlist[[i]] <- t(as.matrix(mcols(grl[[i]])[1]))
893
-            }
894
-            if(datatype[i] == 'gr') {
895
-              refgr <- grl[[i]]
896
-              matlist[[i]] <- t(as.matrix(mcols(grl[[i]])[1]))
897
-            }
898
-            if(is.null(colors))
899
-              dTrack[[i]] <- DataTrack(range=refgr, data=matlist[[i]] , name=grl_trackname[i], chromosome=chr,
900
-                                     fill="darkgreen", ylim=c(0, yLim[i]))
901
-            else
902
-              dTrack[[i]] <- DataTrack(range=refgr, data=matlist[[i]] , name=grl_trackname[i], chromosome=chr,
903
-                                       fill=colors[i], ylim=c(0, yLim[i]))
904
-        }
905
-
906
-    if(!is.null(start) && !is.null(end))
855
+    dTrack <- NULL
856
+    brTrack <- NULL 
857
+    AnnoTrack <- NULL 
858
+    txTrack <- NULL
859
+    
860
+    if(!is.null(grl))
907 861
     {
908
-      brmeth_gr <- GRanges(chr,IRanges(start,end))
909
-      if(abs(start-end) < 50)
862
+      matlist <- list()
863
+      dTrack <- list()
864
+      for (i in 1:length(grl))
910 865
       {
911
-        strack <- SequenceTrack(org)
912
-        anntrack <- list(itrack, axisTrack, strack)
866
+        grl_trackname <- names(grl)
867
+        if(datatype[i] == 'mC') {
868
+          refgr <- rowRanges(grl[[i]])
869
+          matlist[[i]] <- t(apply(binmC(grl[[i]]),1,mean))
870
+        }
871
+        if(datatype[i] == 'C') {
872
+          refgr <- rowRanges(grl[[i]])
873
+          matlist[[i]] <- t(apply(binC(grl[[i]]),1,mean))
874
+        }
875
+        if(datatype[i] == 'rC') {
876
+          refgr <- rowRanges(grl[[i]])
877
+          matlist[[i]] <- t(apply(binrC(grl[[i]]),1,mean))
878
+        }
879
+        if(datatype[i] == 'density') {
880
+          refgr <- rowRanges(grl[[i]])
881
+          matlist[[i]] <- t(apply(binscore(grl[[i]]),1,mean))
882
+        }
883
+        if(datatype[i] == 'cols') {
884
+          refgr <- grl[[i]]
885
+          matlist[[i]] <- t(as.matrix(mcols(grl[[i]])[1]))
886
+        }
887
+        if(is.null(colors))
888
+          dTrack[[i]] <- DataTrack(range=refgr, data=matlist[[i]] , name=grl_trackname[i], chromosome=chr,
889
+                                   fill="darkgreen", ylim=c(0, yLim[i]))
890
+        else
891
+          dTrack[[i]] <- DataTrack(range=refgr, data=matlist[[i]] , name=grl_trackname[i], chromosome=chr,
892
+                                   fill=colors[i], ylim=c(0, yLim[i]))
913 893
       }
914
-      else
915
-        anntrack <- list(itrack, axisTrack)
894
+    }
895
+    
896
+    brmeth_gr <- GRanges(chr,IRanges(start,end))
897
+    if(abs(start-end) < 50){
898
+      strack <- SequenceTrack(org)
899
+      anntrack <- list(itrack, axisTrack, strack)
916 900
     }
917 901
     else
918
-    {
919
-      start <- min(start(refgr[seqnames(refgr)==chr,]))
920
-      end <- max(end(refgr[seqnames(refgr)==chr,]))
921 902
       anntrack <- list(itrack, axisTrack)
922
-      brmeth_gr <- GRanges(chr,IRanges(start,end))
923
-    }
924 903
 
925
-
926
-    brTrack <- list()
927
-    bsdat <- NULL
928
-    brlist <- list()
929
-    plotype_length <- length(grl)
904
+    if(!is.null(grl))
905
+      plotype_length <- length(grl)
906
+    else
907
+      plotype_length <- 0
930 908
     if(!is.null(brmeth))
931 909
     {
910
+      brTrack <- list()
911
+      bsdat <- NULL
912
+      brlist <- list()
913
+      if(is.null(names(brmeth)))
914
+        stop('names of brmeth cannot be NULL...')
915
+      brmeth_trackname <- names(brmeth)
932 916
       for (i in 1:length(brmeth))
933 917
       {
934 918
         bsdat[i] <- unlist(mapBSdata2GRanges(GenoRanges=brmeth_gr, Sample= brmeth[[i]],
935 919
                                       context=mcContext))
920
+        if(is.na(bsdat[i]))
921
+          stop('This genomic range does not have any base resolution methylation data for the given sequence context..')
936 922
         mc <- mcols(bsdat[[i]])$C/(mcols(bsdat[[i]])$C+ mcols(bsdat[[i]])$T)
937 923
         mc <- as.matrix(mc)
938 924
         temp_gr <- bsdat[[i]]
... ...
@@ -946,9 +932,12 @@ plotMeth <- function(grl, colors=NULL, datatype, yLim, brmeth=NULL, mcContext="C
946 932
     plottype <- rep("histogram", plotype_length)
947 933
 #### adding annotation track
948 934
 
949
-    AnnoTrack <- list()
950 935
     if(!is.null(annodata))
951 936
     {
937
+      AnnoTrack <- list()
938
+      if(is.null(names(annodata)))
939
+        stop('names of annodata cannot be NULL...')
940
+      anno_trackname <- names(annodata)
952 941
       col <- rainbow(length(annodata))
953 942
       for (i in 1:length(annodata))
954 943
       {
... ...
@@ -961,10 +950,9 @@ plotMeth <- function(grl, colors=NULL, datatype, yLim, brmeth=NULL, mcContext="C
961 950
     	txdb <- transcriptDB
962 951
     	txTrack <- GeneRegionTrack(txdb, chromosome=chr, name="Transcripts", showId=TRUE,
963 952
     	                           geneSymbol=TRUE, background.panel = "#FFFEDB", background.title = "brown")
964
-    	combtrack <- c(anntrack, dTrack, brTrack, AnnoTrack, txTrack)
965
-     }
966
-    else combtrack <- c(anntrack, dTrack, brTrack, AnnoTrack)
967
-
953
+    }
954
+    
955
+    combtrack <- c(anntrack, dTrack, brTrack, AnnoTrack, txTrack)
968 956
     plotTracks(combtrack, type=plottype, chromosome=chr, from=start, to=end,
969 957
                reverseStrand = FALSE, background.panel = "#FFFEDB", background.title = "brown", col="black")
970 958
 }
Browse code

Replaced rowData with rowRanges

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@101585 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 30/03/2015 09:54:09
Showing1 changed files
... ...
@@ -779,7 +779,7 @@ profileDNAmetBinParallel <- function(GenoRanges, Sample, mcCLASS='mCG',
779 779
     chrcomb <- NULL
780 780
     chrcomb <- do.call("rbind", clRes)
781 781
     Object <- new("GEcollection", SummarizedExperiment(assays=assays(chrcomb),
782
-                                                       rowData(chrcomb)))
782
+                                                       rowRanges(chrcomb)))
783 783
     Object
784 784
 }
785 785
 
... ...
@@ -872,23 +872,23 @@ plotMeth <- function(grl, colors=NULL, datatype, yLim, brmeth=NULL, mcContext="C
872 872
     for (i in 1:length(grl))
873 873
         {
874 874
             if(datatype[i] == 'mC') {
875
-                refgr <- rowData(grl[[i]])
875
+                refgr <- rowRanges(grl[[i]])
876 876
                 matlist[[i]] <- t(binmC(grl[[i]]))
877 877
             }
878 878
             if(datatype[i] == 'C') {
879
-                refgr <- rowData(grl[[i]])
879
+                refgr <- rowRanges(grl[[i]])
880 880
                 matlist[[i]] <- t(binC(grl[[i]]))
881 881
             }
882 882
             if(datatype[i] == 'rC') {
883
-                refgr <- rowData(grl[[i]])
883
+                refgr <- rowRanges(grl[[i]])
884 884
                 matlist[[i]] <- t(binrC(grl[[i]]))
885 885
             }
886 886
             if(datatype[i] == 'density') {
887
-                refgr <- rowData(grl[[i]])
887
+                refgr <- rowRanges(grl[[i]])
888 888
                 matlist[[i]] <- t(binscore(grl[[i]]))
889 889
             }
890 890
             if(datatype[i] == 'cols') {
891
-                refgr <- rowData(grl[[i]])
891
+                refgr <- rowRanges(grl[[i]])
892 892
                 matlist[[i]] <- t(as.matrix(mcols(grl[[i]])[1]))
893 893
             }
894 894
             if(datatype[i] == 'gr') {
Browse code

Updated development version according to release version

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@101582 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 30/03/2015 09:32:48
Showing1 changed files
... ...
@@ -86,6 +86,7 @@ meth.call <- function(files_location, output_folder, no_overlap, read.context, N
86 86
     }
87 87
 
88 88
     parallel::mclapply(cmd, runperl, mc.cores=Nproc, mc.preschedule=TRUE)
89
+    
89 90
     message("Creating temporary BAM files from input SAM files...")
90 91
     message()
91 92
     setwd(output_folder)
... ...
@@ -169,7 +170,7 @@ BSprepare <-  function(files_location, output_folder, tabixPath, bc=1.5/100) {
169 170
         stop('tabix not found at tabixPath ..')
170 171
     if(!file.exists(paste(tabixPath, '/bgzip', sep='')))
171 172
         stop('bgzip not found at tabixPath ..')
172
-
173
+    
173 174
     files_location <- normalizePath(files_location)
174 175
     output_folder <- normalizePath(output_folder)
175 176
     tabixPath <- normalizePath(tabixPath)
... ...
@@ -189,8 +190,35 @@ BSprepare <-  function(files_location, output_folder, tabixPath, bc=1.5/100) {
189 190
         pValues <-  -round(10*log10(pValues))
190 191
         return(pValues)
191 192
     }
193
+    
194
+    
195
+    ####### checking for the chromosome column ##########
192 196
     all_files <- list.files(path = files_location, pattern = ".txt")
197
+    for (i in 1:length(all_files))
198
+    {
199
+      path <- paste0(files_location,"/",all_files[[i]])
200
+      temp_data <- fread(path,nrows=10)
201
+      if(length(grep("chr",temp_data$V1))==0)
202
+      {
203
+        cmd <- paste("sed -i 's/\"//g'",path)
204
+        system(cmd)
205
+        cmd <- paste("sed -i 's/^/chr/'",path) 
206
+        system(cmd)
207
+      }
208
+    }
209
+    
210
+    ############### sorting chromosome files ############
211
+    
193 212
     sample_name <- unlist(strsplit(all_files, split=".txt"))
213
+    cmd <- paste("sort -k1,1 -k2,4n", paste0(files_location, "/", all_files) , ">", 
214
+                 paste0(files_location, "/", sample_name,"_sort.txt"))
215
+    sapply(cmd,system)
216
+    cmd <- paste("mv", paste0(files_location, "/", sample_name,"_sort.txt"), 
217
+                 paste0(files_location, "/", sample_name,".txt"))
218
+    sapply(cmd,system)
219
+    
220
+    ######################################################
221
+    
194 222
     for(i in 1:length(all_files)) {
195 223
         filecg <- all_files[i]
196 224
         message(filecg)
... ...
@@ -214,11 +242,12 @@ BSprepare <-  function(files_location, output_folder, tabixPath, bc=1.5/100) {
214 242
                                         # concatenating, compressing and building tabix index
215 243
     message('postprocessing ..')
216 244
     Tabix_files <- list.files(path = files_location, pattern = "_tabix.txt")
217
-                                        #sample_name <- unlist(strsplit(Tabix_files, split="_tabix.txt"))
245
+                                        
218 246
                                         # generating the TABIX compressed file
219 247
     for(filetb in Tabix_files) {
220 248
         filetb_name <- unlist(strsplit(filetb,split=".txt"))
221
-        str <- paste('cat', paste0(files_location, "/", filetb), '>', paste0(output_folder, "/", filetb_name,"_out.txt"))
249
+        str <- paste('cat', paste0(files_location, "/", filetb), '>', 
250
+                     paste0(output_folder, "/", filetb_name,"_out.txt"))
222 251
         system(str)
223 252
         str <- paste0(tabixPath, '/bgzip ', output_folder, "/", filetb_name,"_out.txt")
224 253
         system(str)
... ...
@@ -726,7 +755,8 @@ profileDNAmetBinParallel <- function(GenoRanges, Sample, mcCLASS='mCG',
726 755
     Chrs <- unique(as.character(seqnames(GenoRanges)))
727 756
     tabixChrs <- seqnamesTabix(Sample@file)
728 757
     otherChr <- which(!(as.character(seqnames(GenoRanges)) %in% tabixChrs))
729
-    GenoRanges <- GenoRanges[-c(otherChr)]
758
+    otherChr <- which(!(as.character(seqnames(GenoRanges)) %in% tabixChrs))
759
+    if(length(otherChr) > 0) GenoRanges <- GenoRanges[-c(otherChr)]
730 760
     Chrs <- unique(as.character(seqnames(GenoRanges)))
731 761
     Nproc <- min(Nproc, length(Chrs))
732 762
     cl <- makeCluster(Nproc, 'PSOCK')
Browse code

Allfunctions.R updated

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@95311 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 10/10/2014 20:27:35
Showing1 changed files
... ...
@@ -622,6 +622,27 @@ profileDNAmetBin <- function(GenoRanges, Sample,
622 622
     }
623 623
     mlsum <- lapply(bsdat, sapply, sumFun)
624 624
     binmC <- matrix(unlist(mlsum), length(mlsum), Nbins, byrow=T)
625
+    unmethList <- list()
626
+    uncov <- Sample@uncov
627
+    for(bin in 1:Nbins) {
628
+      gel <- extractBinGRanges(GenoRanges=GenoRanges, bin=bin, nbins=Nbins)
629
+      ov <- findOverlaps(gel,uncov)
630
+      if(length(ov) > 0)
631
+      {
632
+        val <- rep(0,length(GenoRanges))
633
+        ind <- unique(queryHits(ov))
634
+        val[ind] <- NA
635
+        unmethList[[bin]] <- val 
636
+      }
637
+      else 
638
+        unmethList[[bin]] <- 0
639
+    }
640
+    
641
+    unmethList <- matrix(unlist(unmethList), Nbins, length(GenoRanges), byrow=T)
642
+    unmethList <- t(unmethList)
643
+    ind <- which(is.na(binmC))
644
+    binmC[ind] <- unmethList[ind]
645
+    
625 646
     wbp <- round(width(GenoRanges)/Nbins)
626 647
     binmC <- apply(binmC, 2, function(x) x/wbp)
627 648
 
Browse code

Allfunctions.R

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@93861 bc3139a8-67e5-0310-9ffc-ced21a209358

Kamal Kishore authored on 05/09/2014 13:27:51
Showing1 changed files
... ...
@@ -8,6 +8,7 @@ process.hmc <- function(file, output_folder, Coverage){
8 8
     if(!is(Coverage,"GRanges"))
9 9
         stop('Coverage has to be of class GRanges with column name coverage..')
10 10
 
11
+    output_folder <- normalizePath(output_folder)  
11 12
     sample_name <- unlist(strsplit(file, split=".txt"))
12 13
     output.files <- list()
13 14
     temp_data <- fread(file, sep="\t", header=FALSE)
... ...
@@ -45,6 +46,9 @@ meth.call <- function(files_location, output_folder, no_overlap, read.context, N
45 46
         stop('read.context has to be of class character ..')
46 47
     if(!is.numeric(Nproc))
47 48
         stop('Nproc has to be of class numeric ..')
49
+    
50
+    files_location <- normalizePath(files_location)
51
+    output_folder <- normalizePath(output_folder)
48 52
 
49 53
                                         # read.type
50 54
     if( !( read.context %in% c("CpG","All")))
... ...
@@ -166,6 +170,9 @@ BSprepare <-  function(files_location, output_folder, tabixPath, bc=1.5/100) {
166 170
     if(!file.exists(paste(tabixPath, '/bgzip', sep='')))
167 171
         stop('bgzip not found at tabixPath ..')
168 172
 
173
+    files_location <- normalizePath(files_location)
174
+    output_folder <- normalizePath(output_folder)
175
+    tabixPath <- normalizePath(tabixPath)
169 176
     binomTestMulti <- function(mat, maxN=50, p=bc, bh=TRUE) {
170 177
         bmat <- matrix(NA, maxN, maxN)
171 178
         for(i in 1:maxN) {
... ...
@@ -187,20 +194,20 @@ BSprepare <-  function(files_location, output_folder, tabixPath, bc=1.5/100) {
187 194
     for(i in 1:length(all_files)) {
188 195
         filecg <- all_files[i]
189 196
         message(filecg)
190
-        filemC <- paste0(files_location, filecg, '.mC')
191
-        str <- paste('cut -f5,6', paste0(files_location, filecg), '>', filemC)
197
+        filemC <- paste0(files_location, "/", filecg, '.mC')
198
+        str <- paste('cut -f5,6', paste0(files_location, "/", filecg), '>', filemC)
192 199
         system(str)
193 200
                                         # computing binomial pvalues
194
-        mCdat <- read.table(filemC, sep='\t', header=FALSE, row.names=NULL)
201
+        mCdat <- fread(filemC)
195 202
         mCdat <- as.matrix(mCdat)
196 203
         mCdat[,2] <- mCdat[,1]+ mCdat[,2]
197 204
         pV <- binomTestMulti(mCdat, p=bc)
198 205
                                         # attaching pvalues
199 206
 
200 207
         filepV <- paste0(filemC, '.pvalues')
201
-        fileTabix <- paste0(files_location, sample_name[i], '_tabix.txt')
208
+        fileTabix <- paste0(files_location, "/", sample_name[i], '_tabix.txt')
202 209
         write(pV, file=filepV, ncolumns=1)
203
-        str <- paste('paste', paste0(files_location, filecg), filepV, '>', fileTabix)
210
+        str <- paste('paste', paste0(files_location, "/", filecg), filepV, '>', fileTabix)
204 211
         system(str)
205 212
         system(paste('rm', filepV, filemC))
206 213
     }
... ...
@@ -211,12 +218,12 @@ BSprepare <-  function(files_location, output_folder, tabixPath, bc=1.5/100) {
211 218
                                         # generating the TABIX compressed file
212 219
     for(filetb in Tabix_files) {
213 220
         filetb_name <- unlist(strsplit(filetb,split=".txt"))
214
-        str <- paste('cat', paste0(files_location, filetb), '>', paste0(output_folder, filetb_name,"_out.txt"))
221
+        str <- paste('cat', paste0(files_location, "/", filetb), '>', paste0(output_folder, "/", filetb_name,"_out.txt"))
215 222
         system(str)
216
-        str <- paste0(tabixPath, '/bgzip ', output_folder, filetb_name,"_out.txt")
223
+        str <- paste0(tabixPath, '/bgzip ', output_folder, "/", filetb_name,"_out.txt")
217 224
         system(str)
218 225
                                         # generating the TABIX index file
219
-        fileoutgz <- paste0(output_folder, filetb_name,"_out.txt", '.gz')
226
+        fileoutgz <- paste0(output_folder, "/", filetb_name,"_out.txt", '.gz')
220 227
         str <- paste(tabixPath, '/tabix -s 1 -b 2 -e 2 -f ', fileoutgz, sep='')
221 228
         system(str)
222 229
     }
... ...
@@ -373,6 +380,11 @@ mapBSdata2GRanges <- function(GenoRanges, Sample, context='all', mC=1, depth=0,
373 380
                                         # assigning NA to GRanges regions in chrs not represented
374 381
     tabixChrs <- seqnamesTabix(Sample@file)
375 382
     representedChr <- which(as.character(seqnames(GenoRanges)) %in% tabixChrs)
383
+    if(length(representedChr) == 0)
384
+      {
385
+      res <- as.list(rep(NA, length(GenoRanges))) 
386
+      return(res)
387
+      }
376 388
     otherChr <- which(!(as.character(seqnames(GenoRanges)) %in% tabixChrs))
377 389
     if(length(otherChr) > 0) res <- as.list(rep(NA, length(GenoRanges)))
378 390
 
Browse code

Add compEpiTools/ erccdashboard/ hiReadsProcessor/ MBASED/ methylPipe/ to the repos.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/methylPipe@93651 bc3139a8-67e5-0310-9ffc-ced21a209358

Marc Carlson authored on 27/08/2014 21:26:05
Showing1 changed files
1 1
new file mode 100755
... ...
@@ -0,0 +1,907 @@
1
+                              # function to determine hmC level from mlml output file
2
+
3
+process.hmc <- function(file, output_folder, Coverage){
4
+    if(!is.character(file))
5
+        stop('file has to be of class character ..')
6
+    if(!is.character(output_folder))
7
+        stop('output_folder has to be of class character ..')
8
+    if(!is(Coverage,"GRanges"))
9
+        stop('Coverage has to be of class GRanges with column name coverage..')
10
+
11
+    sample_name <- unlist(strsplit(file, split=".txt"))
12
+    output.files <- list()
13
+    temp_data <- fread(file, sep="\t", header=FALSE)
14
+    temp_data_gr <- GRanges(temp_data[,1],IRanges(temp_data[,2], temp_data[,2]))
15
+    ov <- findOverlaps(temp_data_gr, Coverage)
16
+    mcols(temp_data_gr)$coverage <- 0
17
+    ind1 <- queryHits(ov)
18
+    ind2 <- subjectHits(ov)
19
+    mcols(temp_data_gr)$coverage[ind1] <- mcols(Coverage)$coverage[ind2]
20
+    temp_data[,4] <- round((temp_data[,4]*mcols(temp_data_gr)$coverage))
21
+    temp_data[,5] <- round((temp_data[,5]*mcols(temp_data_gr)$coverage))
22
+    temp_data[,6] <- round((temp_data[,6]*mcols(temp_data_gr)$coverage))
23
+    ind <- which(temp_data[,7]==0)
24
+    temp_data <- temp_data[ind,]
25
+    temp_data_CpG <- temp_data[,c(1,2,4,6)]
26
+    temp_data_hmC <- temp_data[,c(1,2,5,6)]
27
+    filename <- file.path(output_folder,
28
+                          paste(sample_name, "_", "CpG", ".txt", sep=""))
29
+    write.table(temp_data_CpG, file=filename)
30
+    filename <- file.path(output_folder,
31
+                          paste(sample_name, "_", "hmC", ".txt", sep=""))
32
+    write.table(temp_data_hmC, file=filename)
33
+}
34
+
35
+                                        # function to determine the methylation
36
+
37
+meth.call <- function(files_location, output_folder, no_overlap, read.context, Nproc){
38
+    if(!is.character(files_location))
39
+        stop('files_location has to be of class character ..')
40
+    if(!is.character(output_folder))
41
+        stop('output_folder has to be of class character ..')
42
+    if(!is.logical(no_overlap))
43
+        stop('no_overlap has to be of class logical ..')
44
+    if(!is.character(read.context))
45
+        stop('read.context has to be of class character ..')
46
+    if(!is.numeric(Nproc))
47
+        stop('Nproc has to be of class numeric ..')
48
+
49
+                                        # read.type
50
+    if( !( read.context %in% c("CpG","All")))
51
+        stop("wrong 'read.context' argument supplied, only 'CpG' or 'All' accepted")
52
+
53
+                                        # list of input sam files
54
+    all_files <- list.files(path = files_location, pattern = ".sam")
55
+    sample_name <- unlist(strsplit(all_files, split=".sam"))
56
+
57
+                                        # list of output files
58
+    output.files <- list()
59
+    output.files[[read.context]] <- file.path(output_folder,
60
+                                              paste(sample_name, "_", read.context, ".txt", sep=""))
61
+                                        # creation of system command
62
+    read.files <- file.path(files_location, all_files)
63
+    perl.arguments <- paste("--file", read.files, "--sam_type paired_sam")
64
+    if(read.context == "CpG" )
65
+        {perl.arguments <- paste(perl.arguments, "--CpG", output.files[["CpG"]] )}
66
+    if(read.context == "All" )
67
+        {perl.arguments <- paste(perl.arguments, "--All", output.files[["All"]] )}
68
+    if(no_overlap){perl.arguments <- paste(perl.arguments, "--no_overlap" )}
69
+                                        # location of the perl script
70
+    perl.loc <- (system.file("exec", "methinfo.pl", package="methylPipe"))
71
+    cmd <- paste("perl", perl.loc, perl.arguments)
72
+                                        # then call perl to process the file
73
+    message(    )
74
+    message("Extracting methylation information from input SAM files and creating output files for each sample...")
75
+    message()
76
+    runperl <- function(x)
77
+        {
78
+            status <- try(system(x))
79
+            if(status != 0)
80
+                {stop("\nError in methylation calling...\n
81
+                Check if it is a sorted Bismark SAM file\n")}
82
+    }
83
+
84
+    parallel::mclapply(cmd, runperl, mc.cores=Nproc, mc.preschedule=TRUE)
85
+    message("Creating temporary BAM files from input SAM files...")
86
+    message()
87
+    setwd(output_folder)
88
+    system('mkdir tempBAM')
89
+    temp_folder <- paste0(output_folder,"/tempBAM")
90
+
91
+    for (i in 1:length(all_files))
92
+        asBam(file.path(files_location, all_files[i]), destination=file.path(temp_folder, sample_name[i]), overwrite=TRUE)
93
+
94
+    message("Creating uncovered regions objects for each sample from BAM files...")
95
+    message()
96
+    bam.files <- file.path(path = temp_folder, paste(sample_name, ".bam", sep=""))
97
+    for (i in 1:length(all_files))
98
+        {
99
+            aln_file <- bam.files[i]
100
+            aln <- readGAlignments(aln_file)
101
+            cov <- coverage(aln)
102
+            ind <- sapply(cov, function(x)(length(which(runValue(x)== 0))))
103
+            cov <- cov[which(ind > 1)]
104
+            uncov_GR <- as(cov, "GRanges")
105
+            uncov_GR <- uncov_GR[mcols(uncov_GR)$score== 0]
106
+            filename <- file.path(output_folder, paste0(sample_name[[i]],"_uncov", ".Rdata"))
107
+            Objectout <- paste0(sample_name[[i]],"_uncov")
108
+            assign(Objectout, uncov_GR)
109
+            save(list=Objectout, file=filename)
110
+        }
111
+    message('Removing all temporary BAM files...')
112
+    message()
113
+    str <- paste("rm -r","tempBAM")
114
+    system(str)
115
+    message('Methylation info and Uncovered regions output files created for each sample in output_folder...')
116
+    message()
117
+    message("Processing done Successfully...")
118
+    message()
119
+}
120
+
121
+
122
+tabixdata2GR <- function(x) {
123
+    if(length(x) == 0) return(NA)
124
+    temp <- strsplit(x, split="\t")
125
+    mat  <- matrix(unlist(temp), ncol=7, byrow=TRUE)
126
+    df   <- as.data.frame(mat, stringsAsFactors=FALSE)
127
+    df_gr <- GRanges(df[,1], IRanges(as.numeric(df[,2]), as.numeric(df[,2])),
128
+                     strand=df[,3], Context=df[,4], C=as.numeric(df[,5]), T=as.numeric(df[,6]), Significance=as.numeric(df[,7]))
129
+    rm(df)
130
+    rm(mat)
131
+    return(df_gr)
132
+}
133
+
134
+BSdata <- function(file, uncov, org) {
135
+    bsdata <-  new('BSdata', file=file, uncov=uncov, org=org)
136
+    return(bsdata)
137
+}
138
+
139
+
140
+BSdataSet <-  function(org, group, ...) {
141
+    Objname <- names(list(...))
142
+    if(!is.null(Objname))
143
+        new("BSdataSet", org=org, group=group, Objlist=list(...), names=names(list(...)))
144
+    else stop('define the names for the Objects in the arguments..')
145
+}
146
+
147
+
148
+GElist <- function(...)
149
+{
150
+    Objname <- names(list(...))
151
+    if(!is.null(Objname))
152
+        new("GElist", Objlist=list(...), names=names(list(...)))
153
+    else stop('define the names for the Objects in the arguments..')
154
+}
155
+
156
+
157
+BSprepare <-  function(files_location, output_folder, tabixPath, bc=1.5/100) {
158
+    if(!is.character(files_location))
159
+        stop('files_location has to be of class character ..')
160
+    if(!is.character(output_folder))
161
+        stop('output_folder has to be of class character ..')
162
+    if(!is.character(tabixPath))
163
+        stop('tabixPath has to be of class character ..')
164
+    if(!file.exists(paste(tabixPath, '/tabix', sep='')))
165
+        stop('tabix not found at tabixPath ..')
166
+    if(!file.exists(paste(tabixPath, '/bgzip', sep='')))
167
+        stop('bgzip not found at tabixPath ..')
168
+
169
+    binomTestMulti <- function(mat, maxN=50, p=bc, bh=TRUE) {
170
+        bmat <- matrix(NA, maxN, maxN)
171
+        for(i in 1:maxN) {
172
+            for(j in i:maxN) bmat[i,j] <- binom.test(x=i, n=j, p=p,
173
+                                                     alternative='greater')$p.value
174
+        }
175
+        pVfun <-  function(x) {
176
+            if(x[2] < (maxN+1)) return(bmat[x[1], x[2]])
177
+            else return(binom.test(x=x[1], n=x[2], p=p,
178
+                                   alternative='greater')$p.value)
179
+        }
180
+        pValues <-  apply(mat, 1, pVfun)
181
+        if(bh) pValues <-  p.adjust(pValues, method='BH')
182
+        pValues <-  -round(10*log10(pValues))
183
+        return(pValues)
184
+    }
185
+    all_files <- list.files(path = files_location, pattern = ".txt")
186
+    sample_name <- unlist(strsplit(all_files, split=".txt"))
187
+    for(i in 1:length(all_files)) {
188
+        filecg <- all_files[i]
189
+        message(filecg)
190
+        filemC <- paste0(files_location, filecg, '.mC')
191
+        str <- paste('cut -f5,6', paste0(files_location, filecg), '>', filemC)
192
+        system(str)
193
+                                        # computing binomial pvalues
194
+        mCdat <- read.table(filemC, sep='\t', header=FALSE, row.names=NULL)
195
+        mCdat <- as.matrix(mCdat)
196
+        mCdat[,2] <- mCdat[,1]+ mCdat[,2]
197
+        pV <- binomTestMulti(mCdat, p=bc)
198
+                                        # attaching pvalues
199
+
200
+        filepV <- paste0(filemC, '.pvalues')
201
+        fileTabix <- paste0(files_location, sample_name[i], '_tabix.txt')
202
+        write(pV, file=filepV, ncolumns=1)
203
+        str <- paste('paste', paste0(files_location, filecg), filepV, '>', fileTabix)
204
+        system(str)
205
+        system(paste('rm', filepV, filemC))
206
+    }
207
+                                        # concatenating, compressing and building tabix index
208
+    message('postprocessing ..')
209
+    Tabix_files <- list.files(path = files_location, pattern = "_tabix.txt")
210
+                                        #sample_name <- unlist(strsplit(Tabix_files, split="_tabix.txt"))
211
+                                        # generating the TABIX compressed file
212
+    for(filetb in Tabix_files) {
213
+        filetb_name <- unlist(strsplit(filetb,split=".txt"))
214
+        str <- paste('cat', paste0(files_location, filetb), '>', paste0(output_folder, filetb_name,"_out.txt"))
215
+        system(str)
216
+        str <- paste0(tabixPath, '/bgzip ', output_folder, filetb_name,"_out.txt")
217
+        system(str)
218
+                                        # generating the TABIX index file
219
+        fileoutgz <- paste0(output_folder, filetb_name,"_out.txt", '.gz')
220
+        str <- paste(tabixPath, '/tabix -s 1 -b 2 -e 2 -f ', fileoutgz, sep='')
221
+        system(str)
222
+    }
223
+}
224
+
225
+
226
+                                        # splitting chromosomes in N Mb regions ..
227
+splitChrs <- function(chrs, org) {
228
+    if(!is(org,"BSgenome") && !is(org,"list"))
229
+        stop('org has to be either a list or an object of class BSgenome ..')
230
+    chrs_org <- seqnames(org)
231
+    if(any(!chrs %in% chrs_org))
232
+        stop('This is not a valid chromosome .. ')
233
+    chrL <- seqlengths(org)[chrs]
234
+    splitDf <- data.frame(chrs, 1, chrL, row.names = NULL, stringsAsFactors=FALSE)
235
+    colnames(splitDf) <- c('chr','start','end')
236
+    return(splitDf)
237
+}
238
+
239
+                                        # fisher's method to determine a Pvalue based on a set of Pvalues
240
+chiCombP <- function(Pvalues) {
241
+    chistat <- -2*(sum(log(Pvalues)))
242
+    chiP <- 1-pchisq(chistat, df=2*length(Pvalues))
243
+    return(chiP)
244
+}
245
+
246
+
247
+                                        # consolidating DMRs ..
248
+
249
+consolidateDMRs <- function(DmrGR, pvThr=0.05, MethDiff_Thr=NULL, log2Er_Thr=NULL,
250
+                            GAP=0, type=NULL, correct=FALSE){
251
+    if(!is(DmrGR, "GRanges"))
252
+        stop('DmrGR has to be of class GRanges ..')
253
+    if(!is.numeric(pvThr))
254
+        stop('pvThr has to be of class numeric ..')
255
+    if(!is.null(MethDiff_Thr) && !is.numeric(MethDiff_Thr))
256
+        stop('MethDiff_Thr has to be either NULL or of class numeric ..')
257
+    if(!is.null(log2Er_Thr) && !is.numeric(log2Er_Thr))
258
+      stop('log2Er_Thr has to be either NULL or of class numeric ..')
259
+    if(!is.numeric(GAP))
260
+        stop('GAP has to be of class numeric ..')
261
+    if(!is.null(MethDiff_Thr) && any(!type %in% c("hyper","hypo")))
262
+        stop('type has to be either NULL or one of hyper and hypo ..')
263
+    if(!is.logical(correct))
264
+        stop('correct has to be of class logical ..')
265
+
266
+    if(correct) mcols(DmrGR)$pValue= p.adjust(mcols(DmrGR)$pValue, method='BH')
267
+    sInds <- which(mcols(DmrGR)$pValue <= pvThr)
268
+    if(!is.null(MethDiff_Thr)) {
269
+                                        # selecting based on methylation diff
270
+        mInds <- which(abs(mcols(DmrGR)$MethDiff_Perc) >= MethDiff_Thr)
271
+        sInds <- IRanges::intersect(sInds, mInds)
272
+    }
273
+    if(!is.null(log2Er_Thr)) {
274
+                                        # selecting based on log2enrichment
275
+        eInds <- which(abs(mcols(DmrGR)$log2Enrichment) >= log2Er_Thr)
276
+        sInds <- IRanges::intersect(sInds, eInds)
277
+    }
278
+    if(length(sInds) == 0) return(NULL)
279
+    DmrGR <- DmrGR[sInds,]
280
+    if(!is.null(type))
281
+    {
282
+      if(type=="hypo")
283
+        DmrGR <- DmrGR[mcols(DmrGR)$MethDiff_Perc < 0]
284
+      else
285
+        DmrGR <- DmrGR[mcols(DmrGR)$MethDiff_Perc > 0]
286
+    }
287
+
288
+                                        # building a GRanges for selected DMRs
289
+
290
+    joinDMR <- function(query, Gap=0, fun=chiCombP) {
291
+        if(!is.numeric(Gap))
292
+            stop('Gap has to be of class numeric .. ')
293
+        if(class(fun) != 'function')
294
+            stop('fun has to be of class function .. ')
295
+        commonChrs <- unique(as.character(seqnames(query)))
296
+        resIR_gr <- GRanges()
297
+        for(chrom in commonChrs) {
298
+            queryF <- query[seqnames(query) == chrom]
299
+            Pv <- mcols(queryF)$pValue
300
+            MethDiff <-  mcols(queryF)$MethDiff_Perc
301
+            Enrichment <-  mcols(queryF)$log2Enrichment
302
+                                        # extracting IRanges from the dataframe
303
+            IR1 <- IRanges(start(queryF), end(queryF))
304
+            IR2 <- IR1
305
+            if(Gap > 0) {
306
+                                        # Gap are added on each side of the genomic regions
307
+                                        # they will be taken back after the intersection
308
+                start(IR1) <- start(IR1) - Gap
309
+                end(IR1) <- end(IR1) + Gap
310
+                start(IR2) <- start(IR2) - Gap
311
+                end(IR2) <- end(IR2) + Gap
312
+            }
313
+            iIR <- IRanges::intersect(IR1, IR2)
314
+            if(length(iIR) == 0) next
315
+            if(Gap > 0) {
316
+                start(iIR) <- start(iIR) + Gap
317
+                end(iIR) <- end(iIR) - Gap
318
+            }
319
+            combPv <- rep(NA, length(iIR))
320
+            combMethDiff <- rep(NA, length(iIR))
321
+            combEnrichment <- rep(NA, length(iIR))
322
+                                        # determining which IR elements were combined in iIR
323
+            result1 <- findOverlaps(iIR, IR1)
324
+            res1 <- cbind(queryHits(result1), subjectHits(result1))
325
+            result2 <- findOverlaps(iIR, IR2)
326
+            res2 <- cbind(queryHits(result1), subjectHits(result1))
327
+            for(uind in unique(res2[,1])) {
328
+                inds <- which(res2[,1] == uind)
329
+                IR2ind <- res2[inds,2]
330
+                Pvs <- Pv[IR2ind]
331
+                Meth <- MethDiff[IR2ind]
332
+                Enr <- Enrichment[IR2ind]
333
+                combPv[uind] <- do.call('fun', list(Pvs))
334
+                combMethDiff[uind] <- do.call(mean, list(Meth))
335
+                combEnrichment[uind] <- do.call(mean, list(Enr))
336
+            }
337
+            suppressWarnings(resIR_gr <- append(resIR_gr, GRanges(Rle(chrom), IRanges(start(iIR), end(iIR)),
338
+                                                                  pValue=round(combPv,3), MethDiff_Perc=round(combMethDiff,3),
339
+                                                                  log2Enrichment=round(combEnrichment,3))))
340
+        }
341
+                                        # appending the new iIR to those generated for the other chromosomes
342
+        zeroWinds <- which(width(resIR_gr) == 1)
343
+        if(length(zeroWinds) > 0) resIR_gr <- resIR_gr[-zeroWinds,]
344
+        resIR_gr
345
+    }
346
+
347
+    dmrGRanges <- joinDMR(DmrGR, Gap=GAP)
348
+    return(dmrGRanges)
349
+}
350
+
351
+
352
+                                        # retrieve mC calls for genomic regions given a BSdata object for a sample
353
+
354
+mapBSdata2GRanges <- function(GenoRanges, Sample, context='all', mC=1, depth=0,
355
+                              pValue=1) {
356
+    if(!is(GenoRanges, "GRanges"))
357
+        stop('GenoRanges has to be of class GRanges ..')
358
+    if(!is(Sample, "BSdata"))
359
+        stop('Sample has to be of class BSdata ..')
360
+    if(length(which(!(context %in% c('all','CG','CHG','CHH')))) > 0)
361
+        stop('context has to be either all or a combination of CG, CHG, and CHH ..')
362
+    if(!is.numeric(mC) && mC < 0 )
363
+        stop('mC has to be of class numeric and positive..')
364
+    if(!is.numeric(depth))
365
+        stop('depth has to be of class numeric ..')
366
+    if(!is.numeric(pValue))
367
+        stop('pValue has to be of class numeric ..')
368
+    if(pValue > 1)
369
+        stop('pValue has to be lower than 1 ..')
370
+
371
+
372
+                                        # querying TABIX indexed files based on the tabix seqnames,
373
+                                        # assigning NA to GRanges regions in chrs not represented
374
+    tabixChrs <- seqnamesTabix(Sample@file)
375
+    representedChr <- which(as.character(seqnames(GenoRanges)) %in% tabixChrs)
376
+    otherChr <- which(!(as.character(seqnames(GenoRanges)) %in% tabixChrs))
377
+    if(length(otherChr) > 0) res <- as.list(rep(NA, length(GenoRanges)))
378
+
379
+    regions <- GenoRanges[representedChr,]
380
+    resScan <- scanTabix(Sample@file, param=regions)
381
+    resScan <- lapply(resScan, tabixdata2GR)
382
+    if(length(otherChr) > 0) res[representedChr] <- resScan
383
+    else res <- resScan
384
+                                        # filtering results
385
+    filterFun <- function(gr, context, mC, depth, pValue) {
386
+        indsList <- list()
387
+        if(context[1] != 'all') indsList$cl <- which(mcols(gr)$Context %in% context)
388
+        if(mC > 0) indsList$mC <- which(mcols(gr)$C > mC)
389
+        if(depth > 0) indsList$d <- which((mcols(gr)$C+mcols(gr)$T) > depth)
390
+        pV <- -10*log10(pValue)
391
+        if(pValue < 1) indsList$p <- which(mcols(gr)$significance > pV)
392
+        tableInds <- table(unlist(indsList))
393
+        inds <- names(tableInds)[tableInds == length(indsList)]
394
+        if(is.null(inds) || length(inds) == 0) return(NA)
395
+        return(gr[as.numeric(inds),])
396
+    }
397
+
398
+
399
+    if(context != 'all' || mC > 1 || depth > 0 || pValue < 1) {
400
+        NAinds <- as.numeric(which(is.na(res)))
401
+        if(length(NAinds) > 0) {
402
+            res[-NAinds] <- lapply(res[-NAinds], filterFun, context, mC, depth, pValue)
403
+
404
+        }
405
+        else res <- lapply(res, filterFun, context, mC, depth, pValue)
406
+    }
407
+    return(res)
408
+}
409
+
410
+
411
+                                        # for  GenomicRanges with N bins, extract the information
412
+                                        # of the Genomic ranges for a given bin
413
+
414
+extractBinGRanges <- function(GenoRanges, bin, nbins) {
415
+    if(!is(GenoRanges, "GRanges"))
416
+        stop('GenoRanges has to be of class GRanges ..')
417
+    if(!is.numeric(bin) || length(bin) != 1 || is.na(bin))
418
+        stop(' bin has to be a single non-NA integer ')
419
+    if(!is.numeric(nbins) || length(nbins) != 1 || is.na(nbins))
420
+        stop(' nbins has to be a single non-NA integer ')
421
+    if( bin < 1 || bin > nbins)
422
+        stop(' bin has to be between 1 and nbins ')
423
+    binsize <- round(width(GenoRanges)/nbins)
424
+    starts <- start(GenoRanges) + (bin-1)*binsize
425
+    gnew <- GRanges(as.character(seqnames(GenoRanges)),
426
+                    IRanges(starts, width=binsize),
427
+                    strand=as.character(strand(GenoRanges)))
428
+    return(gnew)
429
+}
430
+
431
+
432
+                                        # As in mapBSdata2GRanges, but returns mC calls
433
+                                        # within each bin of each Genomic region
434
+
435
+mapBSdata2GRangesBin <- function(GenoRanges, Sample,
436
+                                 context='all', mC=1, depth=0, pValue=1, nbins){
437
+    if(!is(GenoRanges, "GRanges"))
438
+        stop('GenoRanges has to be of class GRanges ..')
439
+    if(!is(Sample,'BSdata'))
440
+        stop('Sample has to be of class BSdata ..')
441
+    if(length(which(!(context %in% c('all','CG','CHG','CHH')))) > 0)
442
+        stop('context has to be either all or a combination of CG, CHG, and CHH ..')
443
+    if(!is.numeric(mC) && mC < 0 )
444
+        stop('mC has to be of class numeric and positive..')
445
+    if(!is.numeric(depth))
446
+        stop('depth has to be of class numeric ..')
447
+    if(!is.numeric(pValue))
448
+        stop('pValue has to be of class numeric ..')
449
+    if(pValue > 1)
450
+        stop('pValue has to be lower than 1 ..')
451
+    if(!is.numeric(nbins))
452
+        stop('nbins has to be of class numeric ..')
453
+    resList <- list()
454
+    if(nbins == 1)
455
+        resList[[1]] <- mapBSdata2GRanges(GenoRanges, Sample, context)
456
+    else
457
+        {
458
+            for(bin in 1:nbins) {
459
+                gel <- extractBinGRanges(GenoRanges=GenoRanges, bin=bin, nbins=nbins)
460
+                resList[[bin]] <- mapBSdata2GRanges(GenoRanges=gel, Sample=Sample,
461
+                                                    context, mC, depth, pValue)
462
+          }
463
+        }
464
+    resrev <- list()
465
+    for(GEind in 1:length(GenoRanges)) resrev[[GEind]] <-
466
+        lapply(resList, function(x) x[[GEind]])
467