Browse code

add function grab_KO_data and data to run it

Shana White authored on 10/07/2018 20:12:46
Showing 6 changed files

... ...
@@ -13,6 +13,7 @@ export(get_KGML)
13 13
 export(get_drug_table)
14 14
 export(get_fisher_info)
15 15
 export(get_graph_object)
16
+export(grab_KO_data)
16 17
 export(keggerize_edges)
17 18
 export(node_mapping_info)
18 19
 export(overlap_info)
19 20
new file mode 100644
... ...
@@ -0,0 +1,57 @@
1
+#' Quickly access LINCS L1000 CGS's for set of perturbagens (KO's)
2
+#' @description Translate raw CGS data to easy-to-use format
3
+#' @export
4
+#' @param cell_line Choose from the set of cell lines: 
5
+#' (A375,A549,ASC,HA1E,HCC515,HEK293T,HEKTE,HEPG2,HT29,MCF7,NCIH716,NPC,PC3,
6
+#' SHSY5Y,SKL,SW480,VCAP)  
7
+#' @param edge_id The numeric value for the edge_id
8
+#' @param data_type Choose from data types: (100_full, 100_bing, 50_lm)
9
+#' @param pert_time Choose from (6,24,48,96,120,144,168)
10
+#' @param pathway_nodes Keep NA unless certain set of perturbagens is designated
11
+#' @return A data frame with conveniently formatted LINCS L100 CGS
12
+#' @examples 
13
+#' MCF_LM_50 <- grab_KO_data("MCF7")
14
+#' MCF_BING_100 <- grab_KO_data("MCF7", data_type = "100_bing")
15
+#' 
16
+grab_KO_data <- function(cell_line, pert_time = 96, data_type = "50_lm", 
17
+                         pathway_nodes = NA){
18
+    data("KO_data", envir = environment())
19
+    KO_data <- get("KO_data")
20
+    keeps <- c(names(KO_data[1:3]), paste0("up",data_type), 
21
+               paste0("dn",data_type))    
22
+    suppressWarnings(    
23
+        if(is.na(pathway_nodes)){
24
+            KO_by_CT <- KO_data[KO_data$pert_time == pert_time & 
25
+                                    KO_data$cell_id == cell_line, keeps]
26
+        }
27
+    )
28
+    suppressWarnings(
29
+        if(!is.na(pathway_nodes)){
30
+            KO_by_CT <- KO_data[KO_data$pert_time == pert_time & 
31
+                                    KO_data$cell_id == cell_line &
32
+                                    KO_data$pert_desc %in% pathway_nodes, keeps]
33
+        }
34
+    )
35
+    names(KO_by_CT)[c(4,5)] <- c("up", "down")
36
+    KO_by_CT <- KO_by_CT[,c(2,4:5)]
37
+    data("conversion_key")
38
+    data("L1000_LM_genes")
39
+    
40
+    for (i in 1:nrow(KO_by_CT)){
41
+        
42
+        KO_by_CT$up_SYMBOL[i] <- 
43
+            list(conversion_key$pr_gene_symbol[
44
+                which(conversion_key$pr_id %in% 
45
+                unlist(strsplit(KO_by_CT$up[i], ";")))])
46
+        KO_by_CT$down_SYMBOL[i] <-
47
+            list(conversion_key$pr_gene_symbol[
48
+                which(conversion_key$pr_id %in% 
49
+                unlist(strsplit(KO_by_CT$down[i], ";")))])
50
+        KO_by_CT$up_count[i] <- length(unlist(KO_by_CT$up_SYMBOL[i]))
51
+        KO_by_CT$down_count[i] <- length(unlist(KO_by_CT$down_SYMBOL[i]))
52
+    }
53
+    KO_by_CT <- KO_by_CT[,c(1,4,5)]
54
+    names(KO_by_CT) <- c("knockout", "up", "down")
55
+    rownames(KO_by_CT) <- KO_by_CT$knockout
56
+    return(KO_by_CT)
57
+}
0 58
\ No newline at end of file
1 59
new file mode 100644
2 60
Binary files /dev/null and b/data/L1000_LM_genes.rda differ
3 61
new file mode 100644
4 62
Binary files /dev/null and b/data/conversion_key.rda differ
5 63
new file mode 100644
... ...
@@ -0,0 +1,33 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/grab_KO_data.R
3
+\name{grab_KO_data}
4
+\alias{grab_KO_data}
5
+\title{Quickly access LINCS L1000 CGS's for set of perturbagens (KO's)}
6
+\usage{
7
+grab_KO_data(cell_line, pert_time = 96, data_type = "50_lm",
8
+  pathway_nodes = NA)
9
+}
10
+\arguments{
11
+\item{cell_line}{Choose from the set of cell lines: 
12
+(A375,A549,ASC,HA1E,HCC515,HEK293T,HEKTE,HEPG2,HT29,MCF7,NCIH716,NPC,PC3,
13
+SHSY5Y,SKL,SW480,VCAP)}
14
+
15
+\item{pert_time}{Choose from (6,24,48,96,120,144,168)}
16
+
17
+\item{data_type}{Choose from data types: (100_full, 100_bing, 50_lm)}
18
+
19
+\item{pathway_nodes}{Keep NA unless certain set of perturbagens is designated}
20
+
21
+\item{edge_id}{The numeric value for the edge_id}
22
+}
23
+\value{
24
+A data frame with conveniently formatted LINCS L100 CGS
25
+}
26
+\description{
27
+Translate raw CGS data to easy-to-use format
28
+}
29
+\examples{
30
+MCF_LM_50 <- grab_KO_data("MCF7")
31
+MCF_BING_100 <- grab_KO_data("MCF7", data_type = "100_bing")
32
+
33
+}
0 34
new file mode 100644
... ...
@@ -0,0 +1,97 @@
1
+#!/usr/bin/env bash
2
+
3
+# This script should be run after cloning, it will update the remote
4
+# information so that git svn commands work properly for both the master and
5
+# release branches.
6
+
7
+set -eou pipefail
8
+IFS=$'\n\t'
9
+
10
+set +u
11
+package=$1
12
+
13
+# otherwise use the default repo name
14
+if [ -z "$package" ];then
15
+  package=$(git remote -v | perl -ne 'if (m!/([^/]+?)(?:.git)?\s!) { print $1; exit}')
16
+fi
17
+set -u
18
+
19
+base_url="https://hedgehog.fhcrc.org/bioconductor/"
20
+
21
+is_mirror_clone () {
22
+  git remote show origin | grep -i -q bioconductor-mirror
23
+  return $?
24
+}
25
+
26
+add_release_tracking () {
27
+  local branch=$1
28
+  shift
29
+  local remote=$1
30
+  shift
31
+  for release_branch in $@; do
32
+    svn_branch=$(echo $release_branch | perl -ne 'if (/release-(\d+)\.(\d+)/) { print "RELEASE_$1_$2"; }')
33
+    svn_url="$base_url/branches/$svn_branch/madman/Rpacks/$package"
34
+    git config --add svn-remote.$release_branch.url $svn_url
35
+    git config --add svn-remote.$release_branch.fetch :refs/remotes/git-svn-$release_branch
36
+    git update-ref refs/remotes/git-svn-$release_branch refs/$remote/$release_branch
37
+  done
38
+}
39
+
40
+add_branch () {
41
+  set +eu
42
+  local local_branch=$1
43
+  local remote_branch=${2-$local_branch}
44
+  if ! git branch --track $local_branch bioc/$remote_branch  2>/dev/null 1>&2; then
45
+    1>&2 cat <<END
46
+$local_branch already exists, create a custom branch to track bioc/$remote_branch with
47
+  \`git branch --track NEW_NAME bioc/$remote_branch\`
48
+END
49
+  fi
50
+}
51
+
52
+if is_mirror_clone; then
53
+    git checkout master
54
+    git svn init "$base_url/trunk/madman/Rpacks/$package"
55
+    git update-ref refs/remotes/git-svn refs/remotes/origin/master
56
+    git svn rebase
57
+    git remote add bioc https://github.com/Bioconductor-mirror/${package}.git
58
+    git fetch bioc 2>/dev/null 1>&2
59
+
60
+    release_branches=$(git branch -r | perl -ne 'if (m!origin/(release-.*)!) { print $1, "\n" }')
61
+    for release_branch in ${release_branches[@]}; do
62
+      add_branch $release_branch
63
+    done
64
+
65
+    add_release_tracking origin heads $release_branches
66
+
67
+    cat <<\END
68
+Commit to git as normal, when you want to push your commits to svn
69
+  1. `git svn rebase` to get the latest SVN changes.
70
+  2. `git svn dcommit --add-author-from` to commit your changes to SVN.
71
+END
72
+
73
+else
74
+    git remote add bioc "https://github.com/Bioconductor-mirror/${package}.git"
75
+    git fetch bioc 2>/dev/null 1>&2
76
+    git config --add svn-remote.devel.url "$base_url/trunk/madman/Rpacks/$package"
77
+    git config --add svn-remote.devel.fetch :refs/remotes/git-svn-devel
78
+    git update-ref refs/remotes/git-svn-devel refs/remotes/bioc/master
79
+
80
+    release_branches=$(git branch -r | perl -ne 'if (m!bioc/(release-.*)!) { print $1, "\n" }')
81
+    add_release_tracking bioc remotes/bioc $release_branches
82
+
83
+    for release_branch in ${release_branches[@]}; do
84
+      add_branch $release_branch
85
+    done
86
+    add_branch devel master
87
+    cat <<\END
88
+Commit to git as normal, when you want to push your commits to svn
89
+  1. `git checkout devel` to switch to the devel branch. (use release-X.X for
90
+        release branches)
91
+  2. `git svn rebase` to get the latest SVN changes.
92
+  3. `git merge master --log` to merge your changes from the master branch
93
+        or skip this step and work directly on the current branch.
94
+  4. `git svn dcommit --add-author-from` to sync and commit
95
+        your changes to svn.
96
+END
97
+fi