Browse code

updated show method, renamed to makefile

paul-shannon authored on 04/03/2022 21:16:37
Showing 1 changed files
... ...
@@ -247,7 +247,7 @@ setMethod('show', 'MotifList',
247 247
       if (length (object) == 0)
248 248
         return ()
249 249
 
250
-      cat ('| Created from downloaded public sources: 2013-Aug-30', '\n', sep='')
250
+      cat ('| Created from downloaded public sources, last update: 2022-Mar-04', '\n', sep='')
251 251
 
252 252
       tbl.dataSource = as.data.frame (table (mcols (object)$dataSource))
253 253
       tbl.org = as.data.frame (table (mcols (object)$organism))
Browse code

fixed knitr error

paul-shannon authored on 23/08/2021 15:17:04
Showing 1 changed files
... ...
@@ -159,7 +159,7 @@ matrixToMemeText = function (matrices)
159 159
     s [index] = sprintf ('MOTIF %s', fixed.name)
160 160
     index = index + 1
161 161
     new.line =
162
-       sprintf ('letter-probability matrix: alength= 4 w= %d nsites= %d E=8.1e-020',
162
+       sprintf ('letter-probability matrix: alength= 4 w= %d nsites= %d E=%f',
163 163
           nrow (tfMat), 45, 8.1e-020)
164 164
     s [index] =  new.line
165 165
     index = index + 1
Browse code

removed motifmatchr and its dependencies

paul-shannon authored on 02/04/2020 20:20:03
Showing 1 changed files
... ...
@@ -4,8 +4,8 @@ setGeneric('geneToMotif', signature='object', function(object, geneSymbols, sour
4 4
 setGeneric('associateTranscriptionFactors', signature='object',
5 5
            function(object, tbl.withMotifs,  source, expand.rows, motifColumnName="motifName")
6 6
               standardGeneric('associateTranscriptionFactors'))
7
-setGeneric('matchMotif', signature='object', function(object, motifs, genomeName, regions, pval.cutoff,
8
-                                                      fimoDataFrameStyle=FALSE) standardGeneric('matchMotif'))
7
+#setGeneric('matchMotif', signature='object', function(object, motifs, genomeName, regions, pval.cutoff,
8
+#                                                      fimoDataFrameStyle=FALSE) standardGeneric('matchMotif'))
9 9
 #------------------------------------------------------------------------------------------------------------------------
10 10
 setClass ('MotifList',
11 11
           contains='SimpleList',
... ...
@@ -64,29 +64,29 @@ MotifList = function (matrices=list(), tbl.metadata=data.frame ())
64 64
 
65 65
 } # ctor
66 66
 #-------------------------------------------------------------------------------
67
-setMethod('matchMotif', signature='MotifList',
68
-
69
-   function(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE){
70
-     motifs.pfmatrix <- lapply(motifs, function(motif) convert_motifs(motif, "TFBSTools-PFMatrix"))
71
-     motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix)
72
-     gr.list <- matchMotifs(motifs.pfmList, regions, genome=genomeName, out="positions", p.cutoff=pval.cutoff)
73
-     result <- gr.list
74
-     if(fimoDataFrameStyle){
75
-        gr <- unlist(gr.list)
76
-        motif.names <- names(gr)
77
-        names(gr) <- NULL
78
-        tbl <- as.data.frame(gr)
79
-        tbl$motif_id <- motif.names
80
-        colnames(tbl)[1] <- "chrom"
81
-        tbl$chrom <- as.character(tbl$chrom)
82
-        colnames(tbl)[grep("score", colnames(tbl))] <- "mood.score"
83
-        new.order <- order(tbl$start, decreasing=FALSE)
84
-        tbl <- tbl[new.order,]
85
-        result <- tbl
86
-        }
87
-     return(result)
88
-     })
89
-
67
+# setMethod('matchMotif', signature='MotifList',
68
+#
69
+#    function(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE){
70
+#      motifs.pfmatrix <- lapply(motifs, function(motif) convert_motifs(motif, "TFBSTools-PFMatrix"))
71
+#      motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix)
72
+#      gr.list <- motifmatchr::matchMotifs(motifs.pfmList, regions, genome=genomeName, out="positions", p.cutoff=pval.cutoff)
73
+#      result <- gr.list
74
+#      if(fimoDataFrameStyle){
75
+#         gr <- unlist(gr.list)
76
+#         motif.names <- names(gr)
77
+#         names(gr) <- NULL
78
+#         tbl <- as.data.frame(gr)
79
+#         tbl$motif_id <- motif.names
80
+#         colnames(tbl)[1] <- "chrom"
81
+#         tbl$chrom <- as.character(tbl$chrom)
82
+#         colnames(tbl)[grep("score", colnames(tbl))] <- "mood.score"
83
+#         new.order <- order(tbl$start, decreasing=FALSE)
84
+#         tbl <- tbl[new.order,]
85
+#         result <- tbl
86
+#         }
87
+#      return(result)
88
+#      })
89
+#
90 90
 #-------------------------------------------------------------------------------
91 91
 setMethod ('subset', signature = 'MotifList',
92 92
 
Browse code

provisional addition of hocomoco v1, with reliability scores, A-D

paul-shannon authored on 10/03/2020 00:06:57
Showing 1 changed files
... ...
@@ -4,6 +4,8 @@ setGeneric('geneToMotif', signature='object', function(object, geneSymbols, sour
4 4
 setGeneric('associateTranscriptionFactors', signature='object',
5 5
            function(object, tbl.withMotifs,  source, expand.rows, motifColumnName="motifName")
6 6
               standardGeneric('associateTranscriptionFactors'))
7
+setGeneric('matchMotif', signature='object', function(object, motifs, genomeName, regions, pval.cutoff,
8
+                                                      fimoDataFrameStyle=FALSE) standardGeneric('matchMotif'))
7 9
 #------------------------------------------------------------------------------------------------------------------------
8 10
 setClass ('MotifList',
9 11
           contains='SimpleList',
... ...
@@ -61,6 +63,30 @@ MotifList = function (matrices=list(), tbl.metadata=data.frame ())
61 63
   object
62 64
 
63 65
 } # ctor
66
+#-------------------------------------------------------------------------------
67
+setMethod('matchMotif', signature='MotifList',
68
+
69
+   function(object, motifs, genomeName, regions, pval.cutoff, fimoDataFrameStyle=FALSE){
70
+     motifs.pfmatrix <- lapply(motifs, function(motif) convert_motifs(motif, "TFBSTools-PFMatrix"))
71
+     motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix)
72
+     gr.list <- matchMotifs(motifs.pfmList, regions, genome=genomeName, out="positions", p.cutoff=pval.cutoff)
73
+     result <- gr.list
74
+     if(fimoDataFrameStyle){
75
+        gr <- unlist(gr.list)
76
+        motif.names <- names(gr)
77
+        names(gr) <- NULL
78
+        tbl <- as.data.frame(gr)
79
+        tbl$motif_id <- motif.names
80
+        colnames(tbl)[1] <- "chrom"
81
+        tbl$chrom <- as.character(tbl$chrom)
82
+        colnames(tbl)[grep("score", colnames(tbl))] <- "mood.score"
83
+        new.order <- order(tbl$start, decreasing=FALSE)
84
+        tbl <- tbl[new.order,]
85
+        result <- tbl
86
+        }
87
+     return(result)
88
+     })
89
+
64 90
 #-------------------------------------------------------------------------------
65 91
 setMethod ('subset', signature = 'MotifList',
66 92
 
Browse code

improved docs for query method

paul-shannon authored on 24/10/2018 19:02:56
Showing 1 changed files
... ...
@@ -307,8 +307,10 @@ setMethod ('query', 'MotifList',
307 307
 
308 308
          # get the cumulative intersection of all the "and" terms
309 309
          # this steadily dimishes the set of indices
310
-       for(indices in and.indices)
311
-         final.indices <- intersect(final.indices, indices)
310
+       for(indices in and.indices){
311
+          final.indices <- intersect(final.indices, indices)
312
+          #message(sprintf(" final.indices length is now %d", length(final.indices)))
313
+          }
312 314
 
313 315
          # lump all of the "or" terms together: they all get included
314 316
        final.indices <- intersect(unlist(or.indices), final.indices)
... ...
@@ -319,6 +321,7 @@ setMethod ('query', 'MotifList',
319 321
 
320 322
        object [final.indices]
321 323
        })
324
+
322 325
 #-------------------------------------------------------------------------------
323 326
 # Addition on 2017/06/15 from Matt Richards
324 327
 
Browse code

big speedup of associateTranscriptionFactors method, exploiting duplicate motifs

paul-shannon authored on 18/10/2018 14:18:37
Showing 1 changed files
... ...
@@ -2,7 +2,8 @@ setGeneric('query', signature='object', function(object, andStrings, orStrings=c
2 2
 setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene'))
3 3
 setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif'))
4 4
 setGeneric('associateTranscriptionFactors', signature='object',
5
-           function(object, tbl.withMotifs,  source, expand.rows) standardGeneric('associateTranscriptionFactors'))
5
+           function(object, tbl.withMotifs,  source, expand.rows, motifColumnName="motifName")
6
+              standardGeneric('associateTranscriptionFactors'))
6 7
 #------------------------------------------------------------------------------------------------------------------------
7 8
 setClass ('MotifList',
8 9
           contains='SimpleList',
... ...
@@ -529,10 +530,10 @@ setMethod ('geneToMotif', 'MotifList',
529 530
 #-------------------------------------------------------------------------------
530 531
 setMethod('associateTranscriptionFactors', 'MotifList',
531 532
 
532
-     function(object, tbl.withMotifs, source, expand.rows){
533
-        stopifnot("motifName" %in% colnames(tbl.withMotifs))
534
-        tbl.tf <- motifToGene(object, tbl.withMotifs$motifName, source)
535
-        merge(tbl.withMotifs, tbl.tf, by.x="motifName", by.y="motif", all.x=TRUE)
533
+     function(object, tbl.withMotifs, source, expand.rows, motifColumnName="motifName"){
534
+        stopifnot(motifColumnName %in% colnames(tbl.withMotifs))
535
+        tbl.tf <- motifToGene(object, unique(tbl.withMotifs[, motifColumnName]), source)
536
+        merge(tbl.withMotifs, tbl.tf, by.x=motifColumnName, by.y="motif", all.x=TRUE)
536 537
         })
537 538
 
538 539
 #-------------------------------------------------------------------------------
Browse code

motif/tf mapping now includes the 'both' option, tfclass and motifdb. motifStack plotting added

paul-shannon authored on 03/06/2018 18:31:01
Showing 1 changed files
... ...
@@ -491,7 +491,7 @@ setMethod ('geneToMotif', 'MotifList',
491 491
 
492 492
    function (object, geneSymbols, source, ignore.case=FALSE) {
493 493
      source <- tolower(source)
494
-     stopifnot(source %in% c("motifdb", "tfclass"))
494
+     stopifnot(all(source %in% c("motifdb", "tfclass")))
495 495
      extract.mdb <- function(gene){
496 496
         geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable"
497 497
         if(ignore.case)
... ...
@@ -503,13 +503,13 @@ setMethod ('geneToMotif', 'MotifList',
503 503
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
504 504
         tbl
505 505
         }
506
-     if(source %in% c("motifdb")){
506
+     if("motifdb" %in% source){
507 507
         tbls <- lapply(geneSymbols, extract.mdb)
508 508
         result <- do.call(rbind, tbls)
509 509
         if(nrow(result) > 0)
510 510
            result$source <- "MotifDb"
511 511
         }
512
-     if(source %in% c("tfclass")){
512
+     if("tfclass" %in% source){
513 513
         if(ignore.case)
514 514
            tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tolower(tf.gene) %in% tolower(geneSymbols))
515 515
         else
Browse code

motifToGene fix: now combined empty source table with populated table

paul-shannon authored on 23/05/2018 18:32:17
Showing 1 changed files
... ...
@@ -431,6 +431,9 @@ setMethod ('motifToGene', 'MotifList',
431 431
       # the mcols (the metadata, the annotation) which accompanies
432 432
       # each pfm matrix
433 433
 
434
+     tbl.mdb <- data.frame()
435
+     tbl.tfc <- data.frame()
436
+
434 437
      name.map <- as.list(motifs)
435 438
      names(name.map) <- motifs
436 439
      for(i in seq_len(length(motifs))){
... ...
@@ -441,46 +444,44 @@ setMethod ('motifToGene', 'MotifList',
441 444
            motifs[i] <-newValue
442 445
             }
443 446
         } # for i
444
-     #browser()
447
+
445 448
      source <- tolower(source)
446 449
      stopifnot(all(source %in% c("motifdb", "tfclass")))
447
-     tbl.mdb <- data.frame()
448 450
      if("motifdb" %in% source){
449 451
         providerId <- NULL   # avoid R CMD check note
450 452
         tbl.mdb <- as.data.frame(subset(mcols(object), providerId %in% motifs))
451
-        if(nrow(tbl.mdb) == 0)
452
-           return(data.frame())
453
-        tbl.mdb <- unique(tbl.mdb [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
454
-        colnames(tbl.mdb) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
455
-        tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
456 453
         if(nrow(tbl.mdb) > 0){
454
+           tbl.mdb <- unique(tbl.mdb [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
455
+           colnames(tbl.mdb) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
456
+           tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
457 457
            tbl.mdb$source <- "MotifDb"
458 458
            tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "pubmedID", "organism", "source")]
459 459
            rownames(tbl.mdb) <- NULL
460
-           }
460
+           } # nrow of tbl.mdb > 0
461 461
         }  # motifDb
462
-     tbl.tfc <- data.frame()
462
+
463 463
      if("tfclass" %in% source){
464 464
         motif <- NULL
465 465
         tbl.tfc <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
466
-        if(nrow(tbl.tfc) == 0)
467
-           return(data.frame())
468
-        tbl.tfc <- unique(tbl.tfc[, c("motif", "tf.gene", "pubmedID")])
469
-        tbl.tfc <- tbl.tfc[order(tbl.tfc$motif),]
470
-        rownames(tbl.tfc) <- NULL
471
-        colnames(tbl.tfc) <- c("motif", "geneSymbol", "pubmedID")
472 466
         if(nrow(tbl.tfc) > 0){
467
+           tbl.tfc <- unique(tbl.tfc[, c("motif", "tf.gene", "pubmedID")])
468
+           tbl.tfc <- tbl.tfc[order(tbl.tfc$motif),]
469
+           rownames(tbl.tfc) <- NULL
470
+           colnames(tbl.tfc) <- c("motif", "geneSymbol", "pubmedID")
473 471
            tbl.tfc$source <- "TFClass"
474 472
            tbl.tfc$organism <- "Hsapiens"
475
-           }
476
-        }
473
+           } # nrow(tbl.tfc) > 0
474
+        } # tfclass
475
+
476
+      if(nrow(tbl.mdb) == 0 && nrow(tbl.tfc) == 0)
477
+         return(data.frame())
478
+
477 479
       tbl.out <- rbind(tbl.mdb, tbl.tfc)
478 480
       dups <- which(duplicated(tbl.out[, c("motif", "geneSymbol", "organism", "source")]))
479 481
       if(length(dups) > 0)
480 482
          tbl.out <- tbl.out[-dups,]
481 483
       if(length(name.map) > 0)
482 484
          tbl.out$motif <- as.character(name.map[tbl.out$motif])
483
-      #browser()
484 485
       tbl.out
485 486
       })
486 487
 
Browse code

associateTranscriptionFactors greatly simplified, builds upon motifToGene

paul-shannon authored on 18/05/2018 23:22:10
Showing 1 changed files
... ...
@@ -475,6 +475,9 @@ setMethod ('motifToGene', 'MotifList',
475 475
            }
476 476
         }
477 477
       tbl.out <- rbind(tbl.mdb, tbl.tfc)
478
+      dups <- which(duplicated(tbl.out[, c("motif", "geneSymbol", "organism", "source")]))
479
+      if(length(dups) > 0)
480
+         tbl.out <- tbl.out[-dups,]
478 481
       if(length(name.map) > 0)
479 482
          tbl.out$motif <- as.character(name.map[tbl.out$motif])
480 483
       #browser()
... ...
@@ -525,53 +528,62 @@ setMethod ('geneToMotif', 'MotifList',
525 528
 #-------------------------------------------------------------------------------
526 529
 setMethod('associateTranscriptionFactors', 'MotifList',
527 530
 
528
-   function(object, tbl.withMotifs, source, expand.rows){
529
-     source <- tolower(source)
530
-     stopifnot(source %in% c("motifdb", "tfclass"))
531
-     tbl.out <- data.frame()
532
-     if(source %in% c("motifdb")){
533
-           # lookup up in the object metadata, expect one TF geneSymbol per matrix name
534
-        pfm.ids <- tbl.withMotifs[, "motifName"]
535
-        matched.rows <- match(pfm.ids, names(as.list(object)))
536
-        #if(length(matched.rows) == nrow(tbl.withMotifs)) {
537
-        tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")]
538
-        tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA
539
-        tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA
540
-        tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new))
541
-        } # direct
542
-     if(source %in% c("tfclass")){
543
-        if(! "shortMotif" %in% colnames(tbl.withMotifs)){
544
-           stop("MotifDb::assoicateTranscriptionFactors needs a 'shortMotif' column with the TFClass source")
545
-           }
546
-        tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE)
547
-        motif.ids <- tbl.withMotifs[, "shortMotif"]
548
-        geneSymbols <- lapply(motif.ids, function(id)
549
-                                 paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif, fixed=TRUE)], collapse=";"))
550
-        geneSymbols <- unlist(geneSymbols)
551
-        pubmedIds   <- lapply(motif.ids, function(id)
552
-                                 unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif, fixed=TRUE)]))
553
-        pubmedIds   <- as.character(pubmedIds)
554
-        pubmedIds   <- gsub("integer(0)", "", pubmedIds, fixed=TRUE)
555
-        tbl.new     <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE)
556
-        tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA
557
-        tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA
558
-        tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new))
559
-
560
-        if(expand.rows){
561
-           rows.with.na <- which(is.na(tbl.out$geneSymbol))
562
-           rows.with.geneSymbol <- setdiff(1:nrow(tbl.out), rows.with.na)
563
-           tbl.asIs <- tbl.out[rows.with.na,]
564
-           tbl.toExpand <- tbl.out[rows.with.geneSymbol,]
565
-           geneSymbols.split <- strsplit(tbl.toExpand$geneSymbol, ";")
566
-           counts <- unlist(lapply(geneSymbols.split, length))
567
-           geneSymbols.split.vec <- unlist(geneSymbols.split)
568
-           tbl.expanded <- splitstackshape::expandRows(tbl.toExpand, counts, count.is.col=FALSE, drop=FALSE)
569
-           stopifnot(length(geneSymbols.split.vec) == nrow(tbl.expanded))
570
-           tbl.expanded$geneSymbol <- geneSymbols.split.vec
571
-           tbl.out <- rbind(tbl.expanded, tbl.asIs)
572
-           }
573
-        } # indirect
574
-     tbl.out
575
-     })
531
+     function(object, tbl.withMotifs, source, expand.rows){
532
+        stopifnot("motifName" %in% colnames(tbl.withMotifs))
533
+        tbl.tf <- motifToGene(object, tbl.withMotifs$motifName, source)
534
+        merge(tbl.withMotifs, tbl.tf, by.x="motifName", by.y="motif", all.x=TRUE)
535
+        })
576 536
 
577 537
 #-------------------------------------------------------------------------------
538
+# setMethod('associateTranscriptionFactors', 'MotifList',
539
+#
540
+#    function(object, tbl.withMotifs, source, expand.rows){
541
+#      source <- tolower(source)
542
+#      stopifnot(source %in% c("motifdb", "tfclass"))
543
+#      tbl.out <- data.frame()
544
+#      if(source %in% c("motifdb")){
545
+#            # lookup up in the object metadata, expect one TF geneSymbol per matrix name
546
+#         pfm.ids <- tbl.withMotifs[, "motifName"]
547
+#         matched.rows <- match(pfm.ids, names(as.list(object)))
548
+#         #if(length(matched.rows) == nrow(tbl.withMotifs)) {
549
+#         tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")]
550
+#         tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA
551
+#         tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA
552
+#         tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new))
553
+#         } # direct
554
+#      if(source %in% c("tfclass")){
555
+#         if(! "shortMotif" %in% colnames(tbl.withMotifs)){
556
+#            stop("MotifDb::assoicateTranscriptionFactors needs a 'shortMotif' column with the TFClass source")
557
+#            }
558
+#         tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE)
559
+#         motif.ids <- tbl.withMotifs[, "shortMotif"]
560
+#         geneSymbols <- lapply(motif.ids, function(id)
561
+#                                  paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif, fixed=TRUE)], collapse=";"))
562
+#         geneSymbols <- unlist(geneSymbols)
563
+#         pubmedIds   <- lapply(motif.ids, function(id)
564
+#                                  unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif, fixed=TRUE)]))
565
+#         pubmedIds   <- as.character(pubmedIds)
566
+#         pubmedIds   <- gsub("integer(0)", "", pubmedIds, fixed=TRUE)
567
+#         tbl.new     <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE)
568
+#         tbl.new$geneSymbol[nchar(tbl.new$geneSymbol)==0] <- NA
569
+#         tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA
570
+#         tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new))
571
+#
572
+#         if(expand.rows){
573
+#            rows.with.na <- which(is.na(tbl.out$geneSymbol))
574
+#            rows.with.geneSymbol <- setdiff(1:nrow(tbl.out), rows.with.na)
575
+#            tbl.asIs <- tbl.out[rows.with.na,]
576
+#            tbl.toExpand <- tbl.out[rows.with.geneSymbol,]
577
+#            geneSymbols.split <- strsplit(tbl.toExpand$geneSymbol, ";")
578
+#            counts <- unlist(lapply(geneSymbols.split, length))
579
+#            geneSymbols.split.vec <- unlist(geneSymbols.split)
580
+#            tbl.expanded <- splitstackshape::expandRows(tbl.toExpand, counts, count.is.col=FALSE, drop=FALSE)
581
+#            stopifnot(length(geneSymbols.split.vec) == nrow(tbl.expanded))
582
+#            tbl.expanded$geneSymbol <- geneSymbols.split.vec
583
+#            tbl.out <- rbind(tbl.expanded, tbl.asIs)
584
+#            }
585
+#         } # indirect
586
+#      tbl.out
587
+#      })
588
+#
589
+#-------------------------------------------------------------------------------
Browse code

motifToGene refactored, now suuports c('motifDb', tfClass') sources

paul-shannon authored on 18/05/2018 22:05:19
Showing 1 changed files
... ...
@@ -1,5 +1,4 @@
1
-setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) standardGeneric ('query'))
2
-setGeneric('query2', signature='object', function(object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) standardGeneric ('query2'))
1
+setGeneric('query', signature='object', function(object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) standardGeneric ('query'))
3 2
 setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene'))
4 3
 setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif'))
5 4
 setGeneric('associateTranscriptionFactors', signature='object',
... ...
@@ -271,17 +270,17 @@ setMethod('show', 'MotifList',
271 270
       }
272 271
     })
273 272
 #-------------------------------------------------------------------------------
274
-setMethod ('query', 'MotifList',
275
-
276
-   function (object, queryString, ignore.case=TRUE) {
277
-       indices = unique (as.integer (unlist (sapply (colnames (mcols (object)),
278
-                    function (colname)
279
-                       grep (queryString, mcols (object)[, colname],
280
-                             ignore.case=ignore.case)))))
281
-        object [indices]
282
-      })
273
+#setMethod ('query', 'MotifList',
274
+#
275
+#   function (object, queryString, ignore.case=TRUE) {
276
+#       indices = unique (as.integer (unlist (sapply (colnames (mcols (object)),
277
+#                    function (colname)
278
+#                       grep (queryString, mcols (object)[, colname],
279
+#                             ignore.case=ignore.case)))))
280
+#        object [indices]
281
+#      })
283 282
 #-------------------------------------------------------------------------------
284
-setMethod ('query2', 'MotifList',
283
+setMethod ('query', 'MotifList',
285 284
 
286 285
    function (object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) {
287 286
       find.indices <- function(queryString)
... ...
@@ -384,37 +383,103 @@ matrixToJasparText <- function (matrices)
384 383
 } # matrixToJasparText
385 384
 #-------------------------------------------------------------------------------
386 385
 # returns a data.frame with motif, geneSymbol, source, pubmedID columns
386
+# setMethod ('oldMotifToGene', 'MotifList',
387
+#
388
+#    function (object, motifs, source) {
389
+#      source <- tolower(source)
390
+#      stopifnot(source %in% c("motifdb", "tfclass"))
391
+#      tbl <- data.frame()
392
+#      if(source %in% c("motifdb")){
393
+#         providerId <- NULL   # avoid R CMD check note
394
+#         tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs))
395
+#         if(nrow(tbl) == 0)
396
+#            return(data.frame())
397
+#         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
398
+#         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
399
+#         tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
400
+#         if(nrow(tbl) > 0)
401
+#            tbl$source <- "MotifDb"
402
+#         }
403
+#      if(source %in% c("tfclass")){
404
+#         motif <- NULL
405
+#         tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
406
+#         if(nrow(tbl) == 0)
407
+#            return(data.frame())
408
+#         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
409
+#         tbl <- tbl[order(tbl$motif),]
410
+#         rownames(tbl) <- NULL
411
+#         colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
412
+#         if(nrow(tbl) > 0)
413
+#            tbl$source <- "TFClass"
414
+#         }
415
+#      tbl
416
+#      }) # oldMotifToGene
417
+#
418
+#-------------------------------------------------------------------------------
419
+# returns a data.frame with motif, geneSymbol, source, pubmedID columns
387 420
 setMethod ('motifToGene', 'MotifList',
388 421
 
389 422
    function (object, motifs, source) {
423
+      # for MotifDb, motif names come in a variety of forms, and our first step
424
+      # is to convert them all, if needed, into that which is found in
425
+      # the "providerId" column of the MotifDb metadata table.
426
+      #
427
+      # first check to see if the supplied motif name is actually a MotifDb
428
+      # matrix list name, e.g., Hsapiens-HOCOMOCOv10-IKZF1_HUMAN.H10MO.C
429
+      # when those cases are discovered, they are translated to the matrices
430
+      # providerId - which is our standard currency for lookup, using
431
+      # the mcols (the metadata, the annotation) which accompanies
432
+      # each pfm matrix
433
+
434
+     name.map <- as.list(motifs)
435
+     names(name.map) <- motifs
436
+     for(i in seq_len(length(motifs))){
437
+        x <-match(motifs[i], names(MotifDb));
438
+        if(!is.na(x)){
439
+           newValue <-  mcols(MotifDb[motifs[i]])$providerId
440
+           names(name.map)[i] <- newValue
441
+           motifs[i] <-newValue
442
+            }
443
+        } # for i
444
+     #browser()
390 445
      source <- tolower(source)
391
-     stopifnot(source %in% c("motifdb", "tfclass"))
392
-     tbl <- data.frame()
393
-     if(source %in% c("motifdb")){
446
+     stopifnot(all(source %in% c("motifdb", "tfclass")))
447
+     tbl.mdb <- data.frame()
448
+     if("motifdb" %in% source){
394 449
         providerId <- NULL   # avoid R CMD check note
395
-        tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs))
396
-        if(nrow(tbl) == 0)
450
+        tbl.mdb <- as.data.frame(subset(mcols(object), providerId %in% motifs))
451
+        if(nrow(tbl.mdb) == 0)
397 452
            return(data.frame())
398
-        tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
399
-        colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
400
-        tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
401
-        if(nrow(tbl) > 0)
402
-           tbl$source <- "MotifDb"
403
-        }
404
-     if(source %in% c("tfclass")){
453
+        tbl.mdb <- unique(tbl.mdb [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
454
+        colnames(tbl.mdb) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
455
+        tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
456
+        if(nrow(tbl.mdb) > 0){
457
+           tbl.mdb$source <- "MotifDb"
458
+           tbl.mdb <- tbl.mdb[, c("motif", "geneSymbol", "pubmedID", "organism", "source")]
459
+           rownames(tbl.mdb) <- NULL
460
+           }
461
+        }  # motifDb
462
+     tbl.tfc <- data.frame()
463
+     if("tfclass" %in% source){
405 464
         motif <- NULL
406
-        tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
407
-        if(nrow(tbl) == 0)
465
+        tbl.tfc <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
466
+        if(nrow(tbl.tfc) == 0)
408 467
            return(data.frame())
409
-        tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
410
-        tbl <- tbl[order(tbl$motif),]
411
-        rownames(tbl) <- NULL
412
-        colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
413
-        if(nrow(tbl) > 0)
414
-           tbl$source <- "TFClass"
468
+        tbl.tfc <- unique(tbl.tfc[, c("motif", "tf.gene", "pubmedID")])
469
+        tbl.tfc <- tbl.tfc[order(tbl.tfc$motif),]
470
+        rownames(tbl.tfc) <- NULL
471
+        colnames(tbl.tfc) <- c("motif", "geneSymbol", "pubmedID")
472
+        if(nrow(tbl.tfc) > 0){
473
+           tbl.tfc$source <- "TFClass"
474
+           tbl.tfc$organism <- "Hsapiens"
475
+           }
415 476
         }
416
-     tbl
417
-     })
477
+      tbl.out <- rbind(tbl.mdb, tbl.tfc)
478
+      if(length(name.map) > 0)
479
+         tbl.out$motif <- as.character(name.map[tbl.out$motif])
480
+      #browser()
481
+      tbl.out
482
+      })
418 483
 
419 484
 #-------------------------------------------------------------------------------
420 485
 # returns a data.frame with motif, geneSymbol, source, pubmedID columns
Browse code

added query2(mdb, andTerms, orTerms=c(), notTerms=c())

paul-shannon authored on 10/05/2018 23:52:12
Showing 1 changed files
... ...
@@ -1,5 +1,5 @@
1
-setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE)
2
-              standardGeneric ('query'))
1
+setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE) standardGeneric ('query'))
2
+setGeneric('query2', signature='object', function(object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) standardGeneric ('query2'))
3 3
 setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene'))
4 4
 setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif'))
5 5
 setGeneric('associateTranscriptionFactors', signature='object',
... ...
@@ -281,6 +281,45 @@ setMethod ('query', 'MotifList',
281 281
         object [indices]
282 282
       })
283 283
 #-------------------------------------------------------------------------------
284
+setMethod ('query2', 'MotifList',
285
+
286
+   function (object, andStrings, orStrings=c(), notStrings=c(), ignore.case=TRUE) {
287
+      find.indices <- function(queryString)
288
+         {unique(as.integer(unlist(sapply(colnames(mcols(object)),
289
+                 function(colname) grep(queryString, mcols(object)[,colname],ignore.case=ignore.case)))))
290
+             }
291
+          # setup defaults
292
+       and.indices <- list(seq_len(length(object)))
293
+       or.indices <- list(seq_len(length(object)))
294
+       not.indices <- list(c())
295
+
296
+       if(length(andStrings) > 0)
297
+          and.indices <- lapply(andStrings, find.indices)
298
+
299
+       if(length(orStrings) > 0)
300
+          or.indices <- lapply(orStrings, find.indices)
301
+
302
+       if(length(notStrings) > 0)
303
+          not.indices <- lapply(notStrings, find.indices)
304
+
305
+          # start with the indices of all elements
306
+       final.indices <- seq_len(length(object))
307
+
308
+         # get the cumulative intersection of all the "and" terms
309
+         # this steadily dimishes the set of indices
310
+       for(indices in and.indices)
311
+         final.indices <- intersect(final.indices, indices)
312
+
313
+         # lump all of the "or" terms together: they all get included
314
+       final.indices <- intersect(unlist(or.indices), final.indices)
315
+
316
+         # finally reduce the set to exclude all indices of all "not" terms
317
+       for(indices in not.indices)
318
+         final.indices <- setdiff(final.indices, indices)
319
+
320
+       object [final.indices]
321
+       })
322
+#-------------------------------------------------------------------------------
284 323
 # Addition on 2017/06/15 from Matt Richards
285 324
 
286 325
 # This will not exactly match JASPAR because units are PFM and JASPAR uses PCM
Browse code

motifToGene and geneToMotif now guard against emtpy tables, ignore.cae option added

paul-shannon authored on 02/02/2018 00:04:51
Showing 1 changed files
... ...
@@ -1,7 +1,7 @@
1 1
 setGeneric('query', signature='object', function(object, queryString, ignore.case=TRUE)
2 2
               standardGeneric ('query'))
3 3
 setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene'))
4
-setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source) standardGeneric('geneToMotif'))
4
+setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source, ignore.case=FALSE) standardGeneric('geneToMotif'))
5 5
 setGeneric('associateTranscriptionFactors', signature='object',
6 6
            function(object, tbl.withMotifs,  source, expand.rows) standardGeneric('associateTranscriptionFactors'))
7 7
 #------------------------------------------------------------------------------------------------------------------------
... ...
@@ -359,7 +359,8 @@ setMethod ('motifToGene', 'MotifList',
359 359
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
360 360
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
361 361
         tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
362
-        tbl$source <- "MotifDb"
362
+        if(nrow(tbl) > 0)
363
+           tbl$source <- "MotifDb"
363 364
         }
364 365
      if(source %in% c("tfclass")){
365 366
         motif <- NULL
... ...
@@ -370,7 +371,8 @@ setMethod ('motifToGene', 'MotifList',
370 371
         tbl <- tbl[order(tbl$motif),]
371 372
         rownames(tbl) <- NULL
372 373
         colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
373
-        tbl$source <- "TFClass"
374
+        if(nrow(tbl) > 0)
375
+           tbl$source <- "TFClass"
374 376
         }
375 377
      tbl
376 378
      })
... ...
@@ -379,12 +381,16 @@ setMethod ('motifToGene', 'MotifList',
379 381
 # returns a data.frame with motif, geneSymbol, source, pubmedID columns
380 382
 setMethod ('geneToMotif', 'MotifList',
381 383
 
382
-   function (object, geneSymbols, source) {
384
+   function (object, geneSymbols, source, ignore.case=FALSE) {
383 385
      source <- tolower(source)
384 386
      stopifnot(source %in% c("motifdb", "tfclass"))
385 387
      extract.mdb <- function(gene){
386 388
         geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable"
387
-        tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene))
389
+        if(ignore.case)
390
+           tbl <- as.data.frame(subset(mcols(object), tolower(geneSymbol) == tolower(gene)))
391
+        else
392
+           tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene))
393
+
388 394
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
389 395
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
390 396
         tbl
... ...
@@ -392,17 +398,22 @@ setMethod ('geneToMotif', 'MotifList',
392 398
      if(source %in% c("motifdb")){
393 399
         tbls <- lapply(geneSymbols, extract.mdb)
394 400
         result <- do.call(rbind, tbls)
395
-        result$source <- "MotifDb"
401
+        if(nrow(result) > 0)
402
+           result$source <- "MotifDb"
396 403
         }
397 404
      if(source %in% c("tfclass")){
398
-        tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols)
405
+        if(ignore.case)
406
+           tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tolower(tf.gene) %in% tolower(geneSymbols))
407
+        else
408
+           tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols)
399 409
         tf.gene <- NULL; motif <- NULL  # workaround R CMD CHECK "no visible binding ..." bogus error
400 410
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
401 411
         tbl <- tbl[order(tbl$tf.gene),]
402 412
         rownames(tbl) <- NULL
403 413
         colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
404 414
         result <- tbl[, c("geneSymbol", "motif", "pubmedID")]
405
-        result$source <- "TFClass"
415
+        if(nrow(result) > 0)
416
+           result$source <- "TFClass"
406 417
         }
407 418
      result
408 419
      })
Browse code

documentation for geneToMotif, motifToGene, associateT...

paul-shannon authored on 04/10/2017 19:49:25
Showing 1 changed files
... ...
@@ -352,23 +352,25 @@ setMethod ('motifToGene', 'MotifList',
352 352
      stopifnot(source %in% c("motifdb", "tfclass"))
353 353
      tbl <- data.frame()
354 354
      if(source %in% c("motifdb")){
355
+        providerId <- NULL   # avoid R CMD check note
355 356
         tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs))
356 357
         if(nrow(tbl) == 0)
357 358
            return(data.frame())
358 359
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
359 360
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
360 361
         tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
361
-        tbl$from <- "MotifDb"
362
+        tbl$source <- "MotifDb"
362 363
         }
363 364
      if(source %in% c("tfclass")){
365
+        motif <- NULL
364 366
         tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
365 367
         if(nrow(tbl) == 0)
366 368
            return(data.frame())
367 369
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
368 370
         tbl <- tbl[order(tbl$motif),]
369 371
         rownames(tbl) <- NULL
370
-        colnames(tbl) <- c("motif", "gene", "pubmedID")
371
-        tbl$from <- "TFClass"
372
+        colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
373
+        tbl$source <- "TFClass"
372 374
         }
373 375
      tbl
374 376
      })
... ...
@@ -380,8 +382,8 @@ setMethod ('geneToMotif', 'MotifList',
380 382
    function (object, geneSymbols, source) {
381 383
      source <- tolower(source)
382 384
      stopifnot(source %in% c("motifdb", "tfclass"))
383
-     #browser()
384 385
      extract.mdb <- function(gene){
386
+        geneSymbol <- NULL # workaround the R CMD check "no visible binding for global variable"
385 387
         tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene))
386 388
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
387 389
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
... ...
@@ -390,16 +392,17 @@ setMethod ('geneToMotif', 'MotifList',
390 392
      if(source %in% c("motifdb")){
391 393
         tbls <- lapply(geneSymbols, extract.mdb)
392 394
         result <- do.call(rbind, tbls)
393
-        result$from <- "MotifDb"
395
+        result$source <- "MotifDb"
394 396
         }
395 397
      if(source %in% c("tfclass")){
396 398
         tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols)
399
+        tf.gene <- NULL; motif <- NULL  # workaround R CMD CHECK "no visible binding ..." bogus error
397 400
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
398 401
         tbl <- tbl[order(tbl$tf.gene),]
399 402
         rownames(tbl) <- NULL
400
-        colnames(tbl) <- c("motif", "gene", "pubmedID")
401
-        result <- tbl[, c("gene", "motif", "pubmedID")]
402
-        result$from <- "TFClass"
403
+        colnames(tbl) <- c("motif", "geneSymbol", "pubmedID")
404
+        result <- tbl[, c("geneSymbol", "motif", "pubmedID")]
405
+        result$source <- "TFClass"
403 406
         }
404 407
      result
405 408
      })
Browse code

motifToGene sidesteps regexes by grep fixed=TRUE

paul-shannon authored on 03/10/2017 18:13:41
Showing 1 changed files
... ...
@@ -353,14 +353,17 @@ setMethod ('motifToGene', 'MotifList',
353 353
      tbl <- data.frame()
354 354
      if(source %in% c("motifdb")){
355 355
         tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs))
356
+        if(nrow(tbl) == 0)
357
+           return(data.frame())
356 358
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
357 359
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
358
-        tbl
359 360
         tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
360 361
         tbl$from <- "MotifDb"
361 362
         }
362 363
      if(source %in% c("tfclass")){
363 364
         tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
365
+        if(nrow(tbl) == 0)
366
+           return(data.frame())
364 367
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
365 368
         tbl <- tbl[order(tbl$motif),]
366 369
         rownames(tbl) <- NULL
... ...
@@ -424,9 +427,11 @@ setMethod('associateTranscriptionFactors', 'MotifList',
424 427
            }
425 428
         tbl.tfClass <- read.table(system.file(package="MotifDb", "extdata", "tfClass.tsv"), sep="\t", as.is=TRUE, header=TRUE)
426 429
         motif.ids <- tbl.withMotifs[, "shortMotif"]
427
-        geneSymbols <- lapply(motif.ids, function(id) paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif)], collapse=";"))
430
+        geneSymbols <- lapply(motif.ids, function(id)
431
+                                 paste(tbl.tfClass$tf.gene[grep(id, tbl.tfClass$motif, fixed=TRUE)], collapse=";"))
428 432
         geneSymbols <- unlist(geneSymbols)
429
-        pubmedIds   <- lapply(motif.ids, function(id) unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif)]))
433
+        pubmedIds   <- lapply(motif.ids, function(id)
434
+                                 unique(tbl.tfClass$pubmedID[grep(id, tbl.tfClass$motif, fixed=TRUE)]))
430 435
         pubmedIds   <- as.character(pubmedIds)
431 436
         pubmedIds   <- gsub("integer(0)", "", pubmedIds, fixed=TRUE)
432 437
         tbl.new     <- data.frame(geneSymbol=geneSymbols, pubmedID=pubmedIds, stringsAsFactors=FALSE)
Browse code

motif/TF mapping sources now case insensitive

paul-shannon authored on 26/09/2017 20:09:39
Showing 1 changed files
... ...
@@ -348,9 +348,10 @@ matrixToJasparText <- function (matrices)
348 348
 setMethod ('motifToGene', 'MotifList',
349 349
 
350 350
    function (object, motifs, source) {
351
-     stopifnot(source %in% c("MotifDb", "TFClass"))
351
+     source <- tolower(source)
352
+     stopifnot(source %in% c("motifdb", "tfclass"))
352 353
      tbl <- data.frame()
353
-     if(source %in% c("MotifDb")){
354
+     if(source %in% c("motifdb")){
354 355
         tbl <- as.data.frame(subset(mcols(object), providerId %in% motifs))
355 356
         tbl <- unique(tbl [, c("geneSymbol", "providerId", "dataSource", "organism", "pubmedID")])
356 357
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
... ...
@@ -358,7 +359,7 @@ setMethod ('motifToGene', 'MotifList',
358 359
         tbl <- tbl[, c("motif", "geneSymbol", "dataSource", "organism", "pubmedID")]
359 360
         tbl$from <- "MotifDb"
360 361
         }
361
-     if(source %in% c("TFClass")){
362
+     if(source %in% c("tfclass")){
362 363
         tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, motif %in% motifs)
363 364
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
364 365
         tbl <- tbl[order(tbl$motif),]
... ...
@@ -374,7 +375,8 @@ setMethod ('motifToGene', 'MotifList',
374 375
 setMethod ('geneToMotif', 'MotifList',
375 376
 
376 377
    function (object, geneSymbols, source) {
377
-     stopifnot(source %in% c("MotifDb", "TFClass"))
378
+     source <- tolower(source)
379
+     stopifnot(source %in% c("motifdb", "tfclass"))
378 380
      #browser()
379 381
      extract.mdb <- function(gene){
380 382
         tbl <- as.data.frame(subset(mcols(object), geneSymbol == gene))
... ...
@@ -382,12 +384,12 @@ setMethod ('geneToMotif', 'MotifList',
382 384
         colnames(tbl) <- c("geneSymbol", "motif", "dataSource", "organism", "pubmedID")
383 385
         tbl
384 386
         }
385
-     if(source %in% c("MotifDb")){
387
+     if(source %in% c("motifdb")){
386 388
         tbls <- lapply(geneSymbols, extract.mdb)
387 389
         result <- do.call(rbind, tbls)
388 390
         result$from <- "MotifDb"
389 391
         }
390
-     if(source %in% c("TFClass")){
392
+     if(source %in% c("tfclass")){
391 393
         tbl <- subset(object@manuallyCuratedGeneMotifAssociationTable, tf.gene %in% geneSymbols)
392 394
         tbl <- unique(tbl[, c("motif", "tf.gene", "pubmedID")])
393 395
         tbl <- tbl[order(tbl$tf.gene),]
... ...
@@ -403,9 +405,10 @@ setMethod ('geneToMotif', 'MotifList',
403 405
 setMethod('associateTranscriptionFactors', 'MotifList',
404 406
 
405 407
    function(object, tbl.withMotifs, source, expand.rows){
406
-     stopifnot(source %in% c("MotifDb", "TFClass"))
408
+     source <- tolower(source)
409
+     stopifnot(source %in% c("motifdb", "tfclass"))
407 410
      tbl.out <- data.frame()
408
-     if(source %in% c("MotifDb")){
411
+     if(source %in% c("motifdb")){
409 412
            # lookup up in the object metadata, expect one TF geneSymbol per matrix name
410 413
         pfm.ids <- tbl.withMotifs[, "motifName"]
411 414
         matched.rows <- match(pfm.ids, names(as.list(object)))
... ...
@@ -415,7 +418,7 @@ setMethod('associateTranscriptionFactors', 'MotifList',
415 418
         tbl.new$pubmedID[nchar(tbl.new$pubmedID)==0] <- NA
416 419
         tbl.out <- as.data.frame(cbind(tbl.withMotifs, tbl.new))
417 420
         } # direct
418
-     if(source %in% c("TFClass")){
421
+     if(source %in% c("tfclass")){
419 422
         if(! "shortMotif" %in% colnames(tbl.withMotifs)){
420 423
            stop("MotifDb::assoicateTranscriptionFactors needs a 'shortMotif' column with the TFClass source")
421 424
            }
Browse code

Fixed jaspar format spacing and updated DESCRIPTION

Matthew Richards authored on 18/09/2017 18:31:33
Showing 1 changed files
... ...
@@ -335,7 +335,10 @@ matrixToJasparText <- function (matrices)
335 335
       s[index] <- ""
336 336
       index <- index + 1
337 337
 
338
-    } # for name
338
+  } # for name
339
+
340
+  # Remove the last blank line
341
+  s <- s[-length(s)]
339 342
 
340 343
   invisible (s)
341 344
 
Browse code

now annotates to TFs properly for MotifDb and TFClass sources

paul-shannon authored on 12/09/2017 20:20:46
Showing 1 changed files
... ...
@@ -3,8 +3,7 @@ setGeneric('query', signature='object', function(object, queryString, ignore.cas
3 3
 setGeneric('motifToGene', signature='object', function(object, motifs, source) standardGeneric('motifToGene'))
4 4
 setGeneric('geneToMotif', signature='object', function(object, geneSymbols, source) standardGeneric('geneToMotif'))
5 5
 setGeneric('associateTranscriptionFactors', signature='object',
6
-           function(object, tbl.withMotifs, motif.column.name, source, expand.rows)
7
-              standardGeneric('associateTranscriptionFactors'))
6
+           function(object, tbl.withMotifs,  source, expand.rows) standardGeneric('associateTranscriptionFactors'))
8 7
 #------------------------------------------------------------------------------------------------------------------------
9 8
 setClass ('MotifList',
10 9
           contains='SimpleList',
... ...
@@ -400,23 +399,25 @@ setMethod ('geneToMotif', 'MotifList',
400 399
 #-------------------------------------------------------------------------------
401 400
 setMethod('associateTranscriptionFactors', 'MotifList',
402 401
 
403
-   function(object, tbl.withMotifs, motif.column.name, source, expand.rows){
402
+   function(object, tbl.withMotifs, source, expand.rows){
404 403
      stopifnot(source %in% c("MotifDb", "TFClass"))
405 404
      tbl.out <- data.frame()
406 405
      if(source %in% c("MotifDb")){
407
-        # "direct" means:  lookup up in the object metadata, expect one TF geneSymbol per matrix name
408
-        pfm.ids <- tbl.withMotifs[, motif.column.name]
406
+           # lookup up in the object metadata, expect one TF geneSymbol per matrix name
407
+        pfm.ids <- tbl.withMotifs[, "motifName"]
409 408
         matched.rows <- match(pfm.ids, names(as.list(object)))
410 409
         #if(length(matched.rows) == nrow(tbl.withMotifs)) {
411 410
         tbl.new <- mcols(object)[matched.rows, c("geneSymbol", "pubmedID")]