R/plotGeneTrack.R
5f77a94e
 ###############################################################################
fddf1f12
 ## plotRegiont.R -- plot a snapshot of a genomic region
a261958a
 ## 30 november 2018 Thomas Faux
fddf1f12
 ## Medical Bioinformatics Centre
5f77a94e
 ###############################################################################
fddf1f12
 
 # transform the dataframe in GRangesList
 # @param df list of dataframes containing the peaks
 # @return returns a GRangesList
a261958a
 
 toGRList <- function(df) {
     grlist <- GenomicRanges::GRangesList()
     for (i in unique(df$external_gene_name)) {
         temp <- GenomicRanges::makeGRangesFromDataFrame(df[which(df$external_gene_name == i),
             ])
         grlist[[i]] <- temp
     }
     names(grlist) <- unique(df$external_gene_name)
     return(grlist)
fddf1f12
 }
 
 # find genes overlaping with the region for the given database
 # @param region GRanges object containing the region to plot
a261958a
 # @param genome Genome used by the db object 'hg19','GRCh38' or 'mm10'
 # @param m biomaRt object @return returns GRanges with the Genes hits found in the region
 
 findGenes <- function(region, m) {
54ed3dc4
     filters <- c("chromosome_name", "start", "end")
     chr <- substr(as.character(seqnames(region)), 4,
         nchar(as.character(seqnames(region))))
     values <- list(chr, GenomicRanges::start(region),
         GenomicRanges::end(region))
     map <- biomaRt::getBM(mart = m, attributes = c("ensembl_gene_id",
                                                     "external_gene_name",
                                                     "ensembl_exon_id",
                                                     "chromosome_name",
                                                     "exon_chrom_start",
                                                     "exon_chrom_end",
                                                     "strand"),
                                                     filters = filters,
                                                     values = values)
a261958a
 
54ed3dc4
     map[which(map$strand == -1), "strand"] <- "-"
     map[which(map$strand == 1), "strand"] <- "+"
ef446a6f
     if(dim(map)[1] > 0){
       map$chromosome_name <- as.character(GenomicRanges::seqnames(region))  
     }
54ed3dc4
     gr <- GenomicRanges::makeGRangesFromDataFrame(map,
                                         keep.extra.columns = TRUE)
     return(gr)
fddf1f12
 }
 
74002fa0
 # Helper function for the two find UTR functions
 
e672ec70
 f <- function(UTR,region) {
74002fa0
   return_object <- GenomicRanges::GRanges()
   for (gene in unique(UTR$external_gene_name)) {
     temp <- UTR[which(UTR$external_gene_name == gene), ]
     temp <- GenomicRanges::makeGRangesFromDataFrame(temp, keep.extra.columns = TRUE)
     temp <- temp[queryHits(GenomicRanges::findOverlaps(temp, region, ignore.strand = TRUE))]
     if (length(temp) > 0) {
       temp <- temp[which(temp$transcript_length == max(temp$transcript_length))]
     }
     return_object <- c(return_object, temp)
   }
   return(return_object)
 }
 
fddf1f12
 # find UTR overlaping with the region for the given database
 # @param region GRanges object containing the region to plot
a261958a
 # @param m biomaRt object @return returns GRanges with the UTR hits found in the region
 
 findUTR5 <- function(region, m) {
     filters <- c("chromosome_name", "start", "end")
     chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
     values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
     map <- biomaRt::getBM(mart = m, attributes = c("external_gene_name", "chromosome_name", "strand",
         "5_utr_start", "5_utr_end", "transcript_length"), filters = filters, values = values)
 
     map[which(map$strand == -1), "strand"] <- "-"
     map[which(map$strand == 1), "strand"] <- "+"
 
     UTR5 <- map[!is.na(map$`5_utr_start`), ]
     if (dim(UTR5)[1] > 0) {
         UTR5$chromosome_name <- as.character(GenomicRanges::seqnames(region))
e672ec70
         UTR5 <- f(UTR5,region)
a261958a
 
     } else {
         UTR5 <- GenomicRanges::GRanges()
     }
     return(UTR5)
fddf1f12
 }
 
 
a261958a
 findUTR3 <- function(region, m) {
     filters <- c("chromosome_name", "start", "end")
     chr <- substr(as.character(seqnames(region)), 4, nchar(as.character(seqnames(region))))
     values <- list(chr, GenomicRanges::start(region), GenomicRanges::end(region))
     map <- biomaRt::getBM(mart = m, attributes = c("external_gene_name", "chromosome_name", "strand",
         "3_utr_start", "3_utr_end", "transcript_length"), filters = filters, values = values)
 
     map[which(map$strand == -1), "strand"] <- "-"
     map[which(map$strand == 1), "strand"] <- "+"
 
     UTR3 <- map[!is.na(map$`3_utr_start`), ]
     if (dim(UTR3)[1] > 0) {
         UTR3$chromosome_name <- as.character(GenomicRanges::seqnames(region))
e672ec70
         UTR3 <- f(UTR3,region)
a261958a
     } else {
         UTR3 <- GenomicRanges::GRanges()
fddf1f12
     }
a261958a
 
     return(UTR3)
fddf1f12
 }
 # get The biomaRt object
 # @param region GRanges object containing the region to plot
a261958a
 # @param genome Genome used by the db object 'hg19','GRCh38' or 'mm10'
fddf1f12
 # @return returns GRanges with the Genes hits found in the region
a261958a
 
 getBiomaRt <- function(region, genome = c("hg19", "GRCh38", "mm10")) {
 
74002fa0
   switch(genome, 
          hg19={
              m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
                                  host = "grch37.ensembl.org", path = "/biomart/martservice",
                                  dataset = "hsapiens_gene_ensembl")
          },
          GRCh38={
              m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
                                  host = "grch38.ensembl.org", path = "/biomart/martservice",
                                  dataset = "hsapiens_gene_ensembl")  
          },
          mm10={
              m <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")  
          },
          {
              m <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
                                  host = "grch37.ensembl.org", path = "/biomart/martservice",
                                  dataset = "hsapiens_gene_ensembl")
          }
   )
a261958a
 
     return(m)
fddf1f12
 }
 
 
a261958a
 # plot the rectangles of the genomic track
fddf1f12
 # @param rg GRanges list of exons in the gene
 # @param gr GRanges object containing the region to plot
 # @param y numeric index for color purpose
 # @param size value to substract and add around the y value to create a rectangle
a261958a
 
 plotRectangles <- function(rg, gr, y, size) {
     graphics::rect(GenomicRanges::end(gr), rep(y, length(gr)) - size, GenomicRanges::start(gr),
         rep(y, length(gr)) + size, border = NA, col = "grey")  #y+5)
fddf1f12
 
 }
 
a261958a
 # plot the arrows between the rectangles in the genomic track
fddf1f12
 # @param x0 list of x start points
 # @param y0 list of y start points
a261958a
 # @param x1 list of x end points @param y1 list of y end points
fddf1f12
 # @param n_arr number of arrows needed
 # @param ... graphics::arrows basic parameters
a261958a
 
 arrowLine <- function(x0, y0, x1, y1, n_arr, ...) {
     x <- seq(x0, x1, length = n_arr + 1)
     y <- seq(y0, y1, length = n_arr + 1)
     graphics::arrows(x[-length(x)], y[-length(y)], x[-1], y[-1], ...)
fddf1f12
 }
 
a261958a
 # return the regions contained between the ranges of a GRanges
fddf1f12
 # @param gr GRanges object
a261958a
 # @return GRanges object
 
 inverseGRanges <- function(gr) {
     temp <- as.data.frame(gr)
     temp <- cbind(as.character(temp[seq_len(dim(temp)[1] - 1), 1]), temp[seq_len(dim(temp)[1] -
         1), 3], temp[2:dim(temp)[1], 2], as.character(temp[seq_len(dim(temp)[1] - 1), 5]))
     temp <- as.data.frame(temp)
     colnames(temp) <- c("seqnames", "start", "end", "strand")
     temp <- GRanges(temp)
     return(temp)
fddf1f12
 }
 
a261958a
 # wrapper function to plot the rectangles and arrows of the genomic track
fddf1f12
 # @param gr GRanges object containing the region to plot
 # @param region GRanges object containing the region to plot
 # @param y Numeric value used for the y position of the boxes
a261958a
 
 plotGRanges <- function(gr, region, y, UTR3, UTR5) {
     gr <- GenomicRanges::reduce(gr)
 
     if (length(gr) > 1) {
         gr_inv <- inverseGRanges(gr)
         for (j in seq_len(length(gr_inv))) {
             value <- (GenomicRanges::end(gr_inv[j]) - GenomicRanges::start(gr_inv[j]))/(GenomicRanges::end(region) -
                 GenomicRanges::start(region))
             N <- arrowNumbers(value)
             if (as.character(strand(gr_inv[j])) == "-") {
                 arrowLine(GenomicRanges::end(gr_inv[j]), y, GenomicRanges::start(gr_inv[j]), y,
64ff683a
                 n_arr = N, length = 0.1)
a261958a
             }
             if (as.character(strand(gr_inv[j])) == "+") {
                 arrowLine(GenomicRanges::start(gr_inv[j]), y, GenomicRanges::end(gr_inv[j]), y,
64ff683a
                 n_arr = N, length = 0.1)
a261958a
             }
         }
57061100
     }
a261958a
     if (length(UTR5) > 0) {
         plotRectangles(region, UTR5, y, size = 0.25)
         if (as.character(unique(strand(gr))) == "-") {
aaa97a1c
             GenomicRanges::end(
64ff683a
                 gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]
                 ) <- GenomicRanges::start(
aaa97a1c
                     UTR5[subjectHits(GenomicRanges::findOverlaps(gr,UTR5))])
a261958a
         }
         if (as.character(unique(strand(gr))) == "+") {
aaa97a1c
             GenomicRanges::start(
64ff683a
                 gr[queryHits(GenomicRanges::findOverlaps(gr, UTR5))]
                 ) <- GenomicRanges::end(
aaa97a1c
                     UTR5[subjectHits(GenomicRanges::findOverlaps(gr,UTR5))])
a261958a
         }
57061100
     }
a261958a
     if (length(UTR3) > 0) {
         plotRectangles(region, UTR3, y, size = 0.25)
         if (as.character(unique(strand(gr))) == "+") {
aaa97a1c
             GenomicRanges::end(
64ff683a
                 gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]
                 ) <- GenomicRanges::start(
aaa97a1c
                     UTR3[subjectHits(GenomicRanges::findOverlaps(gr,UTR3))])
a261958a
         }
         if (as.character(unique(strand(gr))) == "-") {
aaa97a1c
             GenomicRanges::start(
64ff683a
                 gr[queryHits(GenomicRanges::findOverlaps(gr, UTR3))]
                 ) <- GenomicRanges::end(
aaa97a1c
                     UTR3[subjectHits(GenomicRanges::findOverlaps(gr,UTR3))])
a261958a
         }
57061100
     }
a261958a
     plotRectangles(region, gr, y, size = 0.5)
57061100
 
fddf1f12
 }
 
a261958a
 arrowNumbers <- function(value) {
     N <- 0
863104d3
 
a261958a
     if (value < 0.25) {
         N = 1
     }
b8e921bf
     if ((value > 0.25) & (value <= 0.75)) {
a261958a
         N = 2
     }
b8e921bf
     if ((value > 0.75) & (value <= 1)) {
a261958a
         N = 3
     }
b8e921bf
     if ((value > 1) & (value <= 2)) {
a261958a
         N = 6
     }
     if (value > 2) {
         N = 12
     }
     return(N)
5f77a94e
 }
 
fddf1f12
 # plot the genomic track
 # @param gr GRanges object containing the region to plot
 # @param region GRanges object containing the region to plot
 
e672ec70
 plotGenomicTrack <- function(gr, UTR3, UTR5, region, cex) {
a261958a
 
     if (dim(as.data.frame(gr))[1] > 0) {
         index <- 0
         if (length(unique(gr$external_gene_name)) == 1) {
aaa97a1c
             graphics::plot(x = 1, ylim = c(0, 2),
                 xlim = c(GenomicRanges::start(region),
                 GenomicRanges::end(region)),
                 xlab = GenomicRanges::seqnames(region), yaxt = "n", ylab = "",
                 xaxt = "n")
e672ec70
             graphics::axis(2, at = 1, labels = FALSE)
             graphics::text(x = par()$usr[1] - 0.03 * (par()$usr[2] - par()$usr[1]), 
                            y = 1, labels = unique(gr$external_gene_name),srt = 90, xpd = NA, font=2, cex= cex)
a261958a
             plotGRanges(gr, region, 1, UTR3, UTR5)
         }
         if (length(unique(gr$external_gene_name)) > 1) {
aaa97a1c
             graphics::plot(x = 1, ylim = c(0, length(unique(gr$external_gene_name))),
                 xlim = c(start(region), end(region)), xlab = seqnames(region),
                 yaxt = "n", ylab = "")
a261958a
             tmin <- 1
             tmax <- length(IRanges::unique(gr$external_gene_name))
             tlab <- seq(tmin, tmax) - 0.5
             lab <- IRanges::unique(gr$external_gene_name)
             graphics::axis(2, at = tlab, labels = FALSE)
             graphics::text(x = par()$usr[1] - 0.01 * (par()$usr[2] - par()$usr[1]), y = tlab,
e672ec70
                 labels = lab, srt = 45, xpd = NA, adj = 1, font=2, cex = cex)
a261958a
             for (i in unique(gr$external_gene_name)) {
                 index <- index + 1
                 gr2 <- gr[(GenomicRanges::elementMetadata(gr)[, "external_gene_name"] %in% i)]
                 UTR3_ind <- UTR3[(GenomicRanges::elementMetadata(UTR3)[, "external_gene_name"] %in%
64ff683a
                     i)]
a261958a
                 UTR5_ind <- UTR5[(GenomicRanges::elementMetadata(UTR5)[, "external_gene_name"] %in%
64ff683a
                     i)]
a261958a
                 plotGRanges(gr2, region, index - 0.5, UTR3 = UTR3_ind, UTR5 = UTR5_ind)
             }
 
         }
     } else {
ef446a6f
         message("There is no genes in the given region :", region)
         graphics::plot(x = 1, ylim = c(0, 2),
                      xlim = c(GenomicRanges::start(region),
                               GenomicRanges::end(region)),
                      xlab = GenomicRanges::seqnames(region), yaxt = "n", ylab = "",
                      xaxt = "n")
fddf1f12
     }
 
 }