Browse code

version 1.11.3

Add the function of mapping KEGG IDs and Gene IDs

Ge Tan authored on 25/10/2016 15:12:06
Showing 5 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: CNEr 
2
-Version: 1.11.2
3
-Date: 2016-10-19
2
+Version: 1.11.3
3
+Date: 2016-10-25
4 4
 Title: CNE Detection and Visualization
5 5
 Description: Large-scale identification and advanced visualization 
6 6
              of sets of conserved noncoding elements.
... ...
@@ -25,7 +25,8 @@ Imports: Biostrings (>= 2.33.4),
25 25
          poweRlaw (>= 0.60.3),
26 26
          annotate (>= 1.50.0),
27 27
          GO.db (>= 3.3.0),
28
-         R.utils (>= 2.3.0)
28
+         R.utils (>= 2.3.0),
29
+         KEGGREST (>= 1.14.0)
29 30
 Depends: R (>= 3.2.2)
30 31
 Suggests: Gviz (>= 1.7.4),
31 32
           BiocStyle,
... ...
@@ -66,4 +67,5 @@ Collate:
66 67
         WholeGenomeAlignment.R
67 68
         Ancora.R
68 69
         CNE-methods.R
69
-        GO.R
70 70
\ No newline at end of file
71
+        GO.R
72
+        KEGG.R
71 73
\ No newline at end of file
... ...
@@ -62,6 +62,7 @@ importFrom(GO.db, GOCCANCESTOR, GOCCOFFSPRING, GOCCCHILDREN,
62 62
                   GOMFANCESTOR, GOMFOFFSPRING, GOMFCHILDREN)
63 63
 importFrom("grDevices", "jpeg", "pdf", "png", "postscript")
64 64
 importFrom(R.utils, gunzip, gzip)
65
+importFrom(KEGGREST, keggGet)
65 66
 
66 67
 ### -----------------------------------------------------------------
67 68
 ### Export S4 classes defined in CNEr
... ...
@@ -1,3 +1,9 @@
1
+CHANGES IN Bioc 3.5
2
+------------------------
3
+NEW FEATURES
4
+    o Add function orgKEGGIds2EntrezIDs to fetch the mapping between KEGG IDs
5
+      and Entrez IDs
6
+    
1 7
 CHANGES IN Bioc 3.4
2 8
 ------------------------
3 9
 NEW FEATURES
4 10
new file mode 100644
... ...
@@ -0,0 +1,53 @@
1
+### -----------------------------------------------------------------
2
+### orgKEGGIds2EntrezIDs: This script is supposed to parse the html page of 
3
+###              certain species's pathway from KEGG and 
4
+###              download the associated information for each KEGG pathway ID.
5
+### Exported!
6
+orgKEGGIds2EntrezIDs <- function(organism="Homo sapiens"){
7
+  ## Species mapping
8
+  organismMapping <- read.table("http://rest.kegg.jp/list/organism",
9
+                                header=FALSE, sep="\t", quote="",
10
+                                comment.char="")
11
+  organismID <- organismMapping[grepl(organism, organismMapping$V3,
12
+                                      ignore.case=TRUE),
13
+                                2, drop=TRUE]
14
+  if(length(organismID) == 0L){
15
+    stop("The provided organism is not available.",
16
+         "Please refer to http://rest.kegg.jp/list/organism for available",
17
+         "organisms")
18
+  }
19
+  html <- readLines(paste0("http://www.genome.jp/kegg-bin/show_organism?menu_type=pathway_maps&org=", organismID))
20
+  html <- grep("^\\d{5}&", html, value=TRUE)
21
+  ### Hopefully the ID is always 5-digit
22
+  pathwayIDs <- paste0(organismID, substr(html, 1L, 5L))
23
+  groups <- sample(rep_len(1L:ceiling(length(pathwayIDs) / 10),
24
+                           length.out=length(pathwayIDs)))
25
+  pathwayIDs <- split(pathwayIDs, groups)
26
+  
27
+  ## query with KEGG Rest server with 10 entries (maximal) a time, 
28
+  ## and 200s to 400s gap between each query.
29
+  query <- lapply(pathwayIDs,
30
+                  function(x){Sys.sleep(sample(200L:400L, size=1L));keggGet(x)})
31
+  
32
+  ## re-organise the query object
33
+  pathways <- list()
34
+  for(i in 1:length(query)){
35
+    for(j in 1:length(query[[i]])){
36
+      pathways[[query[[i]][[j]]$ENTRY]] <-
37
+        query[[i]][[j]]
38
+    }
39
+  }
40
+  ## Get the Pathway IDs to Entrez Gene IDs mapping
41
+  pathwayIDs2GeneIDs <- list()
42
+  for(i in 1:length(pathways)){
43
+    genesInfo <- pathways[[i]]$GENE
44
+    if(is.null(genesInfo)){
45
+      pathwayIDs2GeneIDs[[pathways[[i]]$ENTRY]] <- NA
46
+      next
47
+    }
48
+    pathwayIDs2GeneIDs[[pathways[[i]]$ENTRY]] <-
49
+      genesInfo[seq(1, length(genesInfo), by=2)]
50
+  }
51
+  pathwayIDs2GeneIDs <- pathwayIDs2GeneIDs[!is.na(pathwayIDs2GeneIDs)]
52
+  return(pathwayIDs2GeneIDs)
53
+}
0 54
\ No newline at end of file
1 55
new file mode 100644
... ...
@@ -0,0 +1,31 @@
1
+\name{orgKEGGIds2EntrezIDs}
2
+\alias{orgKEGGIds2EntrezIDs}
3
+\title{
4
+  Fetch mapping from KEGG IDs to Entrez IDs
5
+}
6
+\description{
7
+  Given the desired organism name, fetch the mapping between KEGG IDs and Entrez
8
+  gene IDs.
9
+}
10
+\usage{
11
+  orgKEGGIds2EntrezIDs(organism="Homo sapiens")
12
+}
13
+
14
+\arguments{
15
+  \item{organism}{
16
+    \code{character}(1): the name of prganism to query. It has to be available
17
+    at http://rest.kegg.jp/list/organism.
18
+  }
19
+}
20
+\value{
21
+  A \code{list} of Entrez gene IDs with KEGG IDs as names.
22
+}
23
+\author{
24
+  Ge Tan
25
+}
26
+
27
+\examples{
28
+  \donttest{
29
+    orgKEGGIds2EntrezIDs(organism="Homo sapiens")
30
+  }
31
+}
0 32
\ No newline at end of file