Browse code

non-core hocomocov11 motifs tagged 'full' now 'secondary'

paul-shannon authored on 18/03/2020 19:22:54
Showing 5 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: MotifDb
2 2
 Type: Package
3 3
 Title: An Annotated Collection of Protein-DNA Binding Sequence Motifs
4
-Version: 1.29.3
5
-Date: 2020-03-16
4
+Version: 1.29.4
5
+Date: 2020-03-18
6 6
 Author: Paul Shannon, Matt Richards
7 7
 Maintainer: Paul Shannon <pshannon@systemsbiology.org>
8 8
 Depends: R (>= 3.5.0), methods, BiocGenerics, S4Vectors, IRanges, GenomicRanges, Biostrings
9 9
Binary files a/inst/extdata/hocomoco11.RData and b/inst/extdata/hocomoco11.RData differ
... ...
@@ -1196,19 +1196,19 @@ test.hocomoco11.with.reliabilityScores <- function()
1196 1196
    checkEquals(length(query(MotifDb, "hocomocov10")), 1066)
1197 1197
    checkEquals(length(query(MotifDb, "hocomocov11")), 768)
1198 1198
    checkEquals(length(query(MotifDb, "hocomocov11-core")), 400)
1199
-   checkEquals(length(query(MotifDb, "hocomocov11-full")), 368)
1199
+   checkEquals(length(query(MotifDb, "hocomocov11-secondary")), 368)
1200 1200
 
1201 1201
    checkEquals(length(query(MotifDb, "hocomocov11-core-A")), 181)
1202
-   checkEquals(length(query(MotifDb, "hocomocov11-full-A")), 46)
1202
+   checkEquals(length(query(MotifDb, "hocomocov11-secondary-A")), 46)
1203 1203
 
1204 1204
    checkEquals(length(query(MotifDb, "hocomocov11-core-B")), 84)
1205
-   checkEquals(length(query(MotifDb, "hocomocov11-full-B")), 19)
1205
+   checkEquals(length(query(MotifDb, "hocomocov11-secondary-B")), 19)
1206 1206
 
1207 1207
    checkEquals(length(query(MotifDb, "hocomocov11-core-C")), 135)
1208
-   checkEquals(length(query(MotifDb, "hocomocov11-full-C")), 13)
1208
+   checkEquals(length(query(MotifDb, "hocomocov11-secondary-C")), 13)
1209 1209
 
1210 1210
    checkEquals(length(query(MotifDb, "hocomocov11-core-D")), 0)
1211
-   checkEquals(length(query(MotifDb, "hocomocov11-full-D")), 290)
1211
+   checkEquals(length(query(MotifDb, "hocomocov11-secondary-D")), 290)
1212 1212
 
1213 1213
 } # test.hocomoco11.with.reliabilityScores
1214 1214
 #------------------------------------------------------------------------------------------------------------------------
1215 1215
Binary files a/vignettes/MotifDb-egr1.pdf and b/vignettes/MotifDb-egr1.pdf differ
... ...
@@ -86,20 +86,24 @@ There are  more than two thousand  matrices, from five sources:
86 86
 > length (MotifDb)
87 87
 \end{Sinput}
88 88
 \begin{Soutput}
89
-[1] 10333
89
+[1] 10701
90 90
 \end{Soutput}
91 91
 \begin{Sinput}
92 92
 > sort (table (values (MotifDb)$dataSource), decreasing=TRUE)
93 93
 \end{Sinput}
94 94
 \begin{Soutput}
95
-     jaspar2018      jaspar2016     HOCOMOCOv10      cisbp_1.02       jolma2013 
96
-           1564            1209            1066             874             843 
97
-   SwissRegulon         stamlab FlyFactorSurvey     JASPAR_2014     JASPAR_CORE 
98
-            684             683             614             592             459 
99
-           hPDI        UniPROBE           HOMER          ScerTF    HOCOMOCOv11A 
100
-            437             380             332             196             181 
101
-   HOCOMOCOv11C    HOCOMOCOv11B 
102
-            135              84 
95
+        jaspar2018         jaspar2016        HOCOMOCOv10         cisbp_1.02 
96
+              1564               1209               1066                874 
97
+         jolma2013       SwissRegulon            stamlab    FlyFactorSurvey 
98
+               843                684                683                614 
99
+       JASPAR_2014        JASPAR_CORE               hPDI           UniPROBE 
100
+               592                459                437                380 
101
+             HOMER HOCOMOCOv11-full-D             ScerTF HOCOMOCOv11-core-A 
102
+               332                290                196                181 
103
+HOCOMOCOv11-core-C HOCOMOCOv11-core-B HOCOMOCOv11-full-A HOCOMOCOv11-full-B 
104
+               135                 84                 46                 19 
105
+HOCOMOCOv11-full-C 
106
+                13 
103 107
 \end{Soutput}
104 108
 \end{Schunk}
105 109
 And 22 organisms (though the majority of the matrices come from just four):
... ...
@@ -109,7 +113,7 @@ And 22 organisms (though the majority of the matrices come from just four):
109 113
 \end{Sinput}
110 114
 \begin{Soutput}
111 115
                                                                      Hsapiens 
112
-                                                                         5016 
116
+                                                                         5384 
113 117
                                                                     Mmusculus 
114 118
                                                                          1411 
115 119
                                                                 Dmelanogaster 
... ...
@@ -257,14 +261,18 @@ all of the human matrices:
257 261
 > query (MotifDb, 'hsapiens')
258 262
 \end{Sinput}
259 263
 \begin{Soutput}
260
-MotifDb object of length 5031
264
+MotifDb object of length 5399
261 265
 | Created from downloaded public sources: 2013-Aug-30
262
-| 5031 position frequency matrices from 14 sources:
266
+| 5399 position frequency matrices from 18 sources:
263 267
 |         cisbp_1.02:  313
264 268
 |        HOCOMOCOv10:  640
265
-|       HOCOMOCOv11A:  181
266
-|       HOCOMOCOv11B:   84
267
-|       HOCOMOCOv11C:  135
269
+| HOCOMOCOv11-core-A:  181
270
+| HOCOMOCOv11-core-B:   84
271
+| HOCOMOCOv11-core-C:  135
272
+| HOCOMOCOv11-full-A:   46
273
+| HOCOMOCOv11-full-B:   19
274
+| HOCOMOCOv11-full-C:   13
275
+| HOCOMOCOv11-full-D:  290
268 276
 |               hPDI:  437
269 277
 |        JASPAR_2014:  117
270 278
 |        JASPAR_CORE:   66
... ...
@@ -275,7 +283,7 @@ MotifDb object of length 5031
275 283
 |       SwissRegulon:  684
276 284
 |           UniPROBE:    2
277 285
 | 8 organism/s
278
-|           Hsapiens: 5016
286
+|           Hsapiens: 5384
279 287
 | Mmusculus;Rnorvegicus;Hsapiens:    6
280 288
 | Mmusculus;Hsapiens:    3
281 289
 | Mmusculus;Rnorvegicus;Xlaevis;Stropicalis;Ggallus;Hsapiens;Btaurus;Ocuniculus:    2
... ...
@@ -301,14 +309,17 @@ If you want all matrices associated with \textbf{\emph{Sox}} transcription facto
301 309
 > query (MotifDb, 'sox')
302 310
 \end{Sinput}
303 311
 \begin{Soutput}
304
-MotifDb object of length 184
312
+MotifDb object of length 196
305 313
 | Created from downloaded public sources: 2013-Aug-30
306
-| 184 position frequency matrices from 14 sources:
314
+| 196 position frequency matrices from 17 sources:
307 315
 |    FlyFactorSurvey:    2
308 316
 |        HOCOMOCOv10:   25
309
-|       HOCOMOCOv11A:    1
310
-|       HOCOMOCOv11B:    5
311
-|       HOCOMOCOv11C:    2
317
+| HOCOMOCOv11-core-A:    1
318
+| HOCOMOCOv11-core-B:    5
319
+| HOCOMOCOv11-core-C:    2
320
+| HOCOMOCOv11-full-A:    2
321
+| HOCOMOCOv11-full-B:    1
322
+| HOCOMOCOv11-full-D:    9
312 323
 |              HOMER:    9
313 324
 |               hPDI:    2
314 325
 |        JASPAR_2014:    8
... ...
@@ -319,7 +330,7 @@ MotifDb object of length 184
319 330
 |       SwissRegulon:   19
320 331
 |           UniPROBE:   15
321 332
 | 7 organism/s
322
-|           Hsapiens:  103
333
+|           Hsapiens:  115
323 334
 |          Mmusculus:   67
324 335
 |      Dmelanogaster:    2
325 336
 | Mmusculus;Rnorvegicus;Hsapiens:    1
... ...
@@ -392,18 +403,22 @@ illustrate:
392 403
 \end{Sinput}
393 404
 \begin{Soutput}
394 405
  [1] "HOX-related factors{3.1.1}: CDX (Caudal type homeobox){3.1.1.9}"          
395
- [2] "TALE-type homeo domain factors{3.1.4}: MEIS{3.1.4.2}"                     
396
- [3] "Paired domain only{3.2.2}: PAX-2-like factors (partial homeobox){3.2.2.2}"
397
- [4] "Paired plus homeo domain{3.2.1}: PAX-4/6{3.2.1.2}"                        
398
- [5] "TALE-type homeo domain factors{3.1.4}: PBX{3.1.4.4}"                      
399
- [6] "TALE-type homeo domain factors{3.1.4}: PKNOX{3.1.4.5}"                    
400
- [7] "TALE-type homeo domain factors{3.1.4}: TGIF{3.1.4.6}"                     
401
- [8] "Homeo"                                                                    
402
- [9] "Homeo::Nuclear Factor I-CCAAT-binding"                                    
403
-[10] "Homeodomain"                                                              
404
-[11] "Paired plus homeo domain"                                                 
405
-[12] "TALE-type homeo domain factors"                                           
406
-[13] "homeodomain"                                                              
406
+ [2] "HOX-related factors{3.1.1}: GBX (Gastrulation brain homeobox){3.1.1.11}"  
407
+ [3] "TALE-type homeo domain factors{3.1.4}: IRX (Iroquois){3.1.4.1}"           
408
+ [4] "TALE-type homeo domain factors{3.1.4}: MEIS{3.1.4.2}"                     
409
+ [5] "Paired domain only{3.2.2}: PAX-1/9 (no homeo remnant){3.2.2.1}"           
410
+ [6] "Paired domain only{3.2.2}: PAX-2-like factors (partial homeobox){3.2.2.2}"
411
+ [7] "Paired plus homeo domain{3.2.1}: PAX-3/7{3.2.1.1}"                        
412
+ [8] "Paired plus homeo domain{3.2.1}: PAX-4/6{3.2.1.2}"                        
413
+ [9] "TALE-type homeo domain factors{3.1.4}: PBX{3.1.4.4}"                      
414
+[10] "TALE-type homeo domain factors{3.1.4}: PKNOX{3.1.4.5}"                    
415
+[11] "TALE-type homeo domain factors{3.1.4}: TGIF{3.1.4.6}"                     
416
+[12] "Homeo"                                                                    
417
+[13] "Homeo::Nuclear Factor I-CCAAT-binding"                                    
418
+[14] "Homeodomain"                                                              
419
+[15] "Paired plus homeo domain"                                                 
420
+[16] "TALE-type homeo domain factors"                                           
421
+[17] "homeodomain"                                                              
407 422
 \end{Soutput}
408 423
 \end{Schunk}
409 424
 \subsection{grep}
... ...
@@ -490,7 +505,7 @@ MotifDb object of length 17
490 505
 | Created from downloaded public sources: 2013-Aug-30
491 506
 | 17 position frequency matrices from 10 sources:
492 507
 |        HOCOMOCOv10:    3
493
-|       HOCOMOCOv11A:    1
508
+| HOCOMOCOv11-core-A:    1
494 509
 |              HOMER:    1
495 510
 |        JASPAR_2014:    1
496 511
 |        JASPAR_CORE:    1
... ...
@@ -805,7 +820,7 @@ pubmedID        26586801
805 820
                 Hsapiens-HOCOMOCOv11A-EGR1_HUMAN.H11MO.0.A                            
806 821
 providerName    EGR1_HUMAN.H11MO.0.A                                                  
807 822
 providerId      EGR1_HUMAN.H11MO.0.A                                                  
808
-dataSource      HOCOMOCOv11A                                                          
823
+dataSource      HOCOMOCOv11-core-A                                                    
809 824
 geneSymbol      EGR1                                                                  
810 825
 geneId          1958                                                                  
811 826
 geneIdType      ENTREZ                                                                
... ...
@@ -1033,7 +1048,7 @@ the grep solution, while serviceable, becomes a little awkward:
1033 1048
 > print (egr1.mouse.jaspar.rows)
1034 1049
 \end{Sinput}
1035 1050
 \begin{Soutput}
1036
-[1] 3878 5032 6153
1051
+[1] 4246 5400 6521
1037 1052
 \end{Soutput}
1038 1053
 \begin{Sinput}
1039 1054
 > egr1.motif <- MotifDb [egr1.mouse.jaspar.rows]