Browse code

motifToGene fix: now combined empty source table with populated table

paul-shannon authored on 23/05/2018 18:32:17
Showing3 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.21.7
5
-Date: 2018-05-18
4
+Version: 1.23.8
5
+Date: 2018-05-23
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
8 8
 Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings
... ...
@@ -431,6 +431,9 @@ setMethod ('motifToGene', 'MotifList',
431 431
       # the mcols (the metadata, the annotation) which accompanies
432 432
       # each pfm matrix
433 433
 
434
+     tbl.mdb <- data.frame()
435
+     tbl.tfc <- data.frame()
436
+
434 437
      name.map <- as.list(motifs)
435 438
      names(name.map) <- motifs
436 439
      for(i in seq_len(length(motifs))){
... ...
@@ -441,46 +444,44 @@ setMethod ('motifToGene', 'MotifList',
441 444
            motifs[i] <-newValue
442 445
             }
443 446
         } # for i
444
-     #browser()
447
+
445 448
      source <- tolower(source)
446 449
      stopifnot(all(source %in% c("motifdb", "tfclass")))
447
-     tbl.mdb <- data.frame()
448 450
      if("motifdb" %in% source){
449 451
         providerId <- NULL   # avoid R CMD check note
450 452
         tbl.mdb <- as.data.frame(subset(mcols(object), providerId %in% motifs))
451
-        if(nrow(tbl.mdb) == 0)
452
-           return(data.frame())
453
-        tbl.mdb <- unique(tbl.mdb [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
454
-        colnames(tbl.mdb) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
455
-        tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
456 453
         if(nrow(tbl.mdb) > 0){
454
+           tbl.mdb <- unique(tbl.mdb [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
455
+           colnames(tbl.mdb) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
456
+           tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
457 457
            tbl.mdb$source <- "MotifDb"
458 458
            tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "pubmedID", "organism", "source")]
459 459
            rownames(tbl.mdb) <- NULL
460
-           }
460
+           } # nrow of tbl.mdb > 0
461 461
         }  # motifDb
462
-     tbl.tfc <- data.frame()
462
+
463 463
      if("tfclass" %in% source){
464 464
         motif <- NULL
465 465
         tbl.tfc <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
466
-        if(nrow(tbl.tfc) == 0)
467
-           return(data.frame())
468
-        tbl.tfc <- unique(tbl.tfc[, c("motif", "tf.gene", "pubmedID")])
469
-        tbl.tfc <- tbl.tfc[order(tbl.tfc$motif),]
470
-        rownames(tbl.tfc) <- NULL
471
-        colnames(tbl.tfc) <- c("motif", "geneSymbol", "pubmedID")
472 466
         if(nrow(tbl.tfc) > 0){
467
+           tbl.tfc <- unique(tbl.tfc[, c("motif", "tf.gene", "pubmedID")])
468
+           tbl.tfc <- tbl.tfc[order(tbl.tfc$motif),]
469
+           rownames(tbl.tfc) <- NULL
470
+           colnames(tbl.tfc) <- c("motif", "geneSymbol", "pubmedID")
473 471
            tbl.tfc$source <- "TFClass"
474 472
            tbl.tfc$organism <- "Hsapiens"
475
-           }
476
-        }
473
+           } # nrow(tbl.tfc) > 0
474
+        } # tfclass
475
+
476
+      if(nrow(tbl.mdb) == 0 && nrow(tbl.tfc) == 0)
477
+         return(data.frame())
478
+
477 479
       tbl.out <- rbind(tbl.mdb, tbl.tfc)
478 480
       dups <- which(duplicated(tbl.out[, c("motif", "geneSymbol", "organism", "source")]))
479 481
       if(length(dups) > 0)
480 482
          tbl.out <- tbl.out[-dups,]
481 483
       if(length(name.map) > 0)
482 484
          tbl.out$motif <- as.character(name.map[tbl.out$motif])
483
-      #browser()
484 485
       tbl.out
485 486
       })
486 487
 
... ...
@@ -953,6 +953,14 @@ test.motifToGene <- function()
953 953
    checkEquals(sort(unique(tbl$geneSymbol)),
954 954
                     c("AR", "RUNX1", "TFAP2A", "TFAP2A(var.3)", "TFAP2B", "TFAP2C", "TFAP2D", "TFAP2E"))
955 955
 
956
+      # (23 may 2018) found that MotifDb works, but c("MotifDb", "TFClass") does not
957
+      # test the fix here
958
+
959
+   motifs <- c("Hsapiens-jolma2013-IRF5-2", "Hsapiens-SwissRegulon-IRF5.SwissRegulon")
960
+   checkEquals(motifToGene(MotifDb, motifs, source=c("MotifDb"))$geneSymbol, c("IRF5", "IRF5"))
961
+   checkEquals(nrow(motifToGene(MotifDb, motifs, source=c("TFClass"))), 0)
962
+   checkEquals(motifToGene(MotifDb, motifs, source=c("MotifDb", "TFClass"))$geneSymbol, c("IRF5", "IRF5"))
963
+
956 964
    } # test.motifToGene
957 965
 #------------------------------------------------------------------------------------------------------------------------
958 966
 test.associateTranscriptionFactors <- function()