Browse code

Fixed labeling of cisbp motifs

Matthew Richards authored on 20/10/2017 20:45:38
Showing3 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.19.17
5
-Date: 2017-10-04
4
+Version: 1.19.18
5
+Date: 2017-10-20
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
8 8
 Depends: R (>= 2.15.0), methods, BiocGenerics, S4Vectors, IRanges, Biostrings
9 9
Binary files a/inst/extdata/cisbp.RData and b/inst/extdata/cisbp.RData differ
... ...
@@ -1,4 +1,4 @@
1
-# MotifDb/inst/scripes/import/cispb/import.R
1
+# MotifDb/inst/scripes/import/cisbp/import.R
2 2
 #------------------------------------------------------------------------------------------------------------------------
3 3
 options (stringsAsFactors=FALSE)
4 4
 printf <- function(...) print(noquote(sprintf(...)))
... ...
@@ -82,7 +82,6 @@ readRawMatrices = function (dataDir)
82 82
     # for this example importer, that directory will be <dataDir>/test
83 83
     # within which we will look for one small file "sample.pcm"
84 84
     
85
-
86 85
   filename <- file.path(dataDir, "cisbp", "sample.pcm")
87 86
   printf("checking for readable matrix file:")
88 87
   printf("     %s", filename)
... ...
@@ -137,7 +136,7 @@ translateMetadataToMotifDbStandardForm <- function(x)
137 136
   
138 137
   std <- list(providerName=x$motif_id,
139 138
               providerId=x$ma_id,
140
-              dataSource="cispb_1.02",
139
+              dataSource="cisbp_1.02",
141 140
               geneSymbol=x$TF_Name,
142 141
               geneId=NA,
143 142
               geneIdType=NA,
... ...
@@ -173,7 +172,7 @@ standardizeOrganism <- function(x)
173 172
    tokens <- strsplit(x, "_")[[1]]
174 173
 
175 174
    if(length(tokens) != 2){
176
-      warning(sprintf("cispb import could not standardize species name: '%s'", x))
175
+      warning(sprintf("cisbp import could not standardize species name: '%s'", x))
177 176
       return(x)
178 177
       }
179 178
 
... ...
@@ -236,13 +235,13 @@ createMotifDbArchiveFile <- function(dataDir, RDataFileName, count=NA)
236 235
      md.fixed <- translateMetadataToMotifDbStandardForm(md)
237 236
      tbl.md[tbl.md.row,] <- as.data.frame(md.fixed)
238 237
      } # for title
239
-          
238
+
240 239
    empties <- which(nchar(tbl.md$providerName) == 0)
241 240
    if(length(empties) > 0){
242 241
       tbl.md <- tbl.md[-empties,]
243 242
       }
244 243
    rownames(tbl.md) <- paste(tbl.md$organism, tbl.md$dataSource, tbl.md$providerName, sep="-")
245
-   matrices <- matrices[1:nrow(tbl.md)]
244
+   matrices <- matrices[-empties]
246 245
    names(matrices) <- rownames(tbl.md)
247 246
 
248 247
    printf("saving %d matrices with metadata to %s", nrow(tbl.md), file.path(getwd(), RDataFileName))
... ...
@@ -256,7 +255,7 @@ createMotifDbArchiveFile <- function(dataDir, RDataFileName, count=NA)
256 255
 #------------------------------------------------------------------------------------------------------------------------
257 256
 ## # files are named by Motif_ID, which also provides the database key used to create the metadata entries
258 257
 ## # for each motif's matrix, "M1093_1.02.txt" and "M1093_1.02"
259
-## # cispb at version 1.02 has 6559 matrices.  each of these is annotated to different organisms
258
+## # cisbp at version 1.02 has 6559 matrices.  each of these is annotated to different organisms
260 259
 ## # producing maybe 70k metadata table entries
261 260
 ## # M1093
262 261
 ## createMetadataTable = function (dataDir, motifIDs)
... ...
@@ -371,7 +370,7 @@ parsePwm = function (title, text)
371 370
    col.count <- 4
372 371
 
373 372
      # our standard form is 4 rows (one per nucelotide) and n columns
374
-     # cispb matrices come in transposed: read them as-is, then transpose
373
+     # cisbp matrices come in transposed: read them as-is, then transpose
375 374
      # to our format
376 375
 
377 376
   result <- matrix(nrow=row.count, ncol=col.count,