Add the function of mapping KEGG IDs and Gene IDs
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: CNEr |
2 |
-Version: 1.11.2 |
|
3 |
-Date: 2016-10-19 |
|
2 |
+Version: 1.11.3 |
|
3 |
+Date: 2016-10-25 |
|
4 | 4 |
Title: CNE Detection and Visualization |
5 | 5 |
Description: Large-scale identification and advanced visualization |
6 | 6 |
of sets of conserved noncoding elements. |
... | ... |
@@ -25,7 +25,8 @@ Imports: Biostrings (>= 2.33.4), |
25 | 25 |
poweRlaw (>= 0.60.3), |
26 | 26 |
annotate (>= 1.50.0), |
27 | 27 |
GO.db (>= 3.3.0), |
28 |
- R.utils (>= 2.3.0) |
|
28 |
+ R.utils (>= 2.3.0), |
|
29 |
+ KEGGREST (>= 1.14.0) |
|
29 | 30 |
Depends: R (>= 3.2.2) |
30 | 31 |
Suggests: Gviz (>= 1.7.4), |
31 | 32 |
BiocStyle, |
... | ... |
@@ -66,4 +67,5 @@ Collate: |
66 | 67 |
WholeGenomeAlignment.R |
67 | 68 |
Ancora.R |
68 | 69 |
CNE-methods.R |
69 |
- GO.R |
|
70 | 70 |
\ No newline at end of file |
71 |
+ GO.R |
|
72 |
+ KEGG.R |
|
71 | 73 |
\ No newline at end of file |
... | ... |
@@ -62,6 +62,7 @@ importFrom(GO.db, GOCCANCESTOR, GOCCOFFSPRING, GOCCCHILDREN, |
62 | 62 |
GOMFANCESTOR, GOMFOFFSPRING, GOMFCHILDREN) |
63 | 63 |
importFrom("grDevices", "jpeg", "pdf", "png", "postscript") |
64 | 64 |
importFrom(R.utils, gunzip, gzip) |
65 |
+importFrom(KEGGREST, keggGet) |
|
65 | 66 |
|
66 | 67 |
### ----------------------------------------------------------------- |
67 | 68 |
### Export S4 classes defined in CNEr |
4 | 10 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,53 @@ |
1 |
+### ----------------------------------------------------------------- |
|
2 |
+### orgKEGGIds2EntrezIDs: This script is supposed to parse the html page of |
|
3 |
+### certain species's pathway from KEGG and |
|
4 |
+### download the associated information for each KEGG pathway ID. |
|
5 |
+### Exported! |
|
6 |
+orgKEGGIds2EntrezIDs <- function(organism="Homo sapiens"){ |
|
7 |
+ ## Species mapping |
|
8 |
+ organismMapping <- read.table("http://rest.kegg.jp/list/organism", |
|
9 |
+ header=FALSE, sep="\t", quote="", |
|
10 |
+ comment.char="") |
|
11 |
+ organismID <- organismMapping[grepl(organism, organismMapping$V3, |
|
12 |
+ ignore.case=TRUE), |
|
13 |
+ 2, drop=TRUE] |
|
14 |
+ if(length(organismID) == 0L){ |
|
15 |
+ stop("The provided organism is not available.", |
|
16 |
+ "Please refer to http://rest.kegg.jp/list/organism for available", |
|
17 |
+ "organisms") |
|
18 |
+ } |
|
19 |
+ html <- readLines(paste0("http://www.genome.jp/kegg-bin/show_organism?menu_type=pathway_maps&org=", organismID)) |
|
20 |
+ html <- grep("^\\d{5}&", html, value=TRUE) |
|
21 |
+ ### Hopefully the ID is always 5-digit |
|
22 |
+ pathwayIDs <- paste0(organismID, substr(html, 1L, 5L)) |
|
23 |
+ groups <- sample(rep_len(1L:ceiling(length(pathwayIDs) / 10), |
|
24 |
+ length.out=length(pathwayIDs))) |
|
25 |
+ pathwayIDs <- split(pathwayIDs, groups) |
|
26 |
+ |
|
27 |
+ ## query with KEGG Rest server with 10 entries (maximal) a time, |
|
28 |
+ ## and 200s to 400s gap between each query. |
|
29 |
+ query <- lapply(pathwayIDs, |
|
30 |
+ function(x){Sys.sleep(sample(200L:400L, size=1L));keggGet(x)}) |
|
31 |
+ |
|
32 |
+ ## re-organise the query object |
|
33 |
+ pathways <- list() |
|
34 |
+ for(i in 1:length(query)){ |
|
35 |
+ for(j in 1:length(query[[i]])){ |
|
36 |
+ pathways[[query[[i]][[j]]$ENTRY]] <- |
|
37 |
+ query[[i]][[j]] |
|
38 |
+ } |
|
39 |
+ } |
|
40 |
+ ## Get the Pathway IDs to Entrez Gene IDs mapping |
|
41 |
+ pathwayIDs2GeneIDs <- list() |
|
42 |
+ for(i in 1:length(pathways)){ |
|
43 |
+ genesInfo <- pathways[[i]]$GENE |
|
44 |
+ if(is.null(genesInfo)){ |
|
45 |
+ pathwayIDs2GeneIDs[[pathways[[i]]$ENTRY]] <- NA |
|
46 |
+ next |
|
47 |
+ } |
|
48 |
+ pathwayIDs2GeneIDs[[pathways[[i]]$ENTRY]] <- |
|
49 |
+ genesInfo[seq(1, length(genesInfo), by=2)] |
|
50 |
+ } |
|
51 |
+ pathwayIDs2GeneIDs <- pathwayIDs2GeneIDs[!is.na(pathwayIDs2GeneIDs)] |
|
52 |
+ return(pathwayIDs2GeneIDs) |
|
53 |
+} |
|
0 | 54 |
\ No newline at end of file |
1 | 55 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,31 @@ |
1 |
+\name{orgKEGGIds2EntrezIDs} |
|
2 |
+\alias{orgKEGGIds2EntrezIDs} |
|
3 |
+\title{ |
|
4 |
+ Fetch mapping from KEGG IDs to Entrez IDs |
|
5 |
+} |
|
6 |
+\description{ |
|
7 |
+ Given the desired organism name, fetch the mapping between KEGG IDs and Entrez |
|
8 |
+ gene IDs. |
|
9 |
+} |
|
10 |
+\usage{ |
|
11 |
+ orgKEGGIds2EntrezIDs(organism="Homo sapiens") |
|
12 |
+} |
|
13 |
+ |
|
14 |
+\arguments{ |
|
15 |
+ \item{organism}{ |
|
16 |
+ \code{character}(1): the name of prganism to query. It has to be available |
|
17 |
+ at http://rest.kegg.jp/list/organism. |
|
18 |
+ } |
|
19 |
+} |
|
20 |
+\value{ |
|
21 |
+ A \code{list} of Entrez gene IDs with KEGG IDs as names. |
|
22 |
+} |
|
23 |
+\author{ |
|
24 |
+ Ge Tan |
|
25 |
+} |
|
26 |
+ |
|
27 |
+\examples{ |
|
28 |
+ \donttest{ |
|
29 |
+ orgKEGGIds2EntrezIDs(organism="Homo sapiens") |
|
30 |
+ } |
|
31 |
+} |
|
0 | 32 |
\ No newline at end of file |