git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/StarBioTrek@122485 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,39 @@ |
1 |
+Package: StarBioTrek |
|
2 |
+Type: Package |
|
3 |
+Title: StarBioTrek |
|
4 |
+Version: 0.99.28 |
|
5 |
+Date: 10-16-2016 |
|
6 |
+Author: Claudia Cava, |
|
7 |
+ Isabella Castiglioni |
|
8 |
+Maintainer: Claudia Cava <claudia.cava@ibfm.cnr.it> |
|
9 |
+Depends: |
|
10 |
+ R (>= 3.3) |
|
11 |
+Imports: |
|
12 |
+ SpidermiR, |
|
13 |
+ KEGGREST, |
|
14 |
+ org.Hs.eg.db, |
|
15 |
+ AnnotationDbi, |
|
16 |
+ e1071, |
|
17 |
+ ROCR, |
|
18 |
+ grDevices |
|
19 |
+Description: This tool StarBioTrek presents some methodologies to measure pathway activity and cross-talk among pathways integrating also the information of network data. |
|
20 |
+License: GPL (>= 3) |
|
21 |
+biocViews: GeneRegulation, |
|
22 |
+ Network, |
|
23 |
+ Pathways, |
|
24 |
+ KEGG |
|
25 |
+Suggests: |
|
26 |
+ BiocStyle, |
|
27 |
+ knitr, |
|
28 |
+ rmarkdown, |
|
29 |
+ testthat, |
|
30 |
+ devtools, |
|
31 |
+ roxygen2, |
|
32 |
+ qgraph, |
|
33 |
+ png, |
|
34 |
+ grid |
|
35 |
+VignetteBuilder: knitr |
|
36 |
+LazyData: true |
|
37 |
+URL: https://github.com/claudiacava/StarBioTrek |
|
38 |
+BugReports: https://github.com/claudiacava/StarBioTrek/issues |
|
39 |
+RoxygenNote: 5.0.1 |
|
0 | 40 |
\ No newline at end of file |
1 | 41 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,27 @@ |
1 |
+# Generated by roxygen2: do not edit by hand |
|
2 |
+ |
|
3 |
+export(GE_matrix) |
|
4 |
+export(SelectedSample) |
|
5 |
+export(average) |
|
6 |
+export(ds_score_crtlk) |
|
7 |
+export(euc_dist_crtlk) |
|
8 |
+export(getKEGGdata) |
|
9 |
+export(getNETdata) |
|
10 |
+export(list_path_net) |
|
11 |
+export(plotting_cross_talk) |
|
12 |
+export(proc_path) |
|
13 |
+export(st_dv) |
|
14 |
+export(svm_classification) |
|
15 |
+importFrom(AnnotationDbi,as.list) |
|
16 |
+importFrom(AnnotationDbi,mappedkeys) |
|
17 |
+importFrom(KEGGREST,keggGet) |
|
18 |
+importFrom(KEGGREST,keggList) |
|
19 |
+importFrom(ROCR,performance) |
|
20 |
+importFrom(ROCR,prediction) |
|
21 |
+importFrom(SpidermiR,SpidermiRdownload_net) |
|
22 |
+importFrom(SpidermiR,SpidermiRquery_spec_networks) |
|
23 |
+importFrom(SpidermiR,SpidermiRquery_species) |
|
24 |
+importFrom(e1071,svm) |
|
25 |
+importFrom(e1071,tune) |
|
26 |
+importFrom(grDevices,rainbow) |
|
27 |
+importFrom(org.Hs.eg.db,org.Hs.egSYMBOL2EG) |
0 | 7 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,64 @@ |
1 |
+#' Download data |
|
2 |
+#' |
|
3 |
+#' StarBioTrek allows you to Download data of samples from StarBioTrek |
|
4 |
+#' |
|
5 |
+#' The functions you're likely to need from \pkg{StarBioTrek} is |
|
6 |
+#' \code{path_star} |
|
7 |
+#'Otherwise refer to the vignettes to see |
|
8 |
+#' how to format the documentation. |
|
9 |
+#' |
|
10 |
+#' @docType package |
|
11 |
+#' @name StarBioTrek |
|
12 |
+NULL |
|
13 |
+ |
|
14 |
+#' Pathway data from KEGG |
|
15 |
+#' @docType data |
|
16 |
+#' @keywords internal |
|
17 |
+#' @name path |
|
18 |
+#' @format A data frame with rows and variables |
|
19 |
+NULL |
|
20 |
+ |
|
21 |
+#' network data |
|
22 |
+#' @docType data |
|
23 |
+#' @keywords internal |
|
24 |
+#' @name netw |
|
25 |
+#' @format A data frame with rows and variables |
|
26 |
+NULL |
|
27 |
+ |
|
28 |
+ |
|
29 |
+ |
|
30 |
+ |
|
31 |
+#' TCGA data |
|
32 |
+#' @docType data |
|
33 |
+#' @keywords internal |
|
34 |
+#' @name Data_CANCER_normUQ_filt |
|
35 |
+#' @format A data frame with rows and variables |
|
36 |
+NULL |
|
37 |
+ |
|
38 |
+#' Score Matrix of pairwise pathway using euclidean distance |
|
39 |
+#' @docType data |
|
40 |
+#' @keywords internal |
|
41 |
+#' @name score_euc_dist |
|
42 |
+#' @format A data frame with rows and variables |
|
43 |
+NULL |
|
44 |
+ |
|
45 |
+#' TCGA data with normal samples |
|
46 |
+#' @docType data |
|
47 |
+#' @keywords internal |
|
48 |
+#' @name norm |
|
49 |
+#' @format A data frame with rows and variables |
|
50 |
+NULL |
|
51 |
+ |
|
52 |
+#' TCGA data with tumour samples |
|
53 |
+#' @docType data |
|
54 |
+#' @keywords internal |
|
55 |
+#' @name tumo |
|
56 |
+#' @format A data frame with rows and variables |
|
57 |
+NULL |
|
58 |
+ |
|
59 |
+#' A matrix of gene expression for pathways given by the user. |
|
60 |
+#' @docType data |
|
61 |
+#' @keywords internal |
|
62 |
+#' @name list_path_plot |
|
63 |
+#' @format A data frame with rows and variables |
|
64 |
+NULL |
|
0 | 65 |
\ No newline at end of file |
1 | 66 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,204 @@ |
1 |
+#' @title Get human KEGG pathway data. |
|
2 |
+#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway |
|
3 |
+#' @param KEGG_path variable |
|
4 |
+#' @export |
|
5 |
+#' @importFrom KEGGREST keggList keggGet |
|
6 |
+#' @importFrom org.Hs.eg.db org.Hs.egSYMBOL2EG |
|
7 |
+#' @importFrom AnnotationDbi mappedkeys as.list |
|
8 |
+#' @return dataframe with human pathway data |
|
9 |
+#' @examples |
|
10 |
+#' path<-getKEGGdata(KEGG_path="Transcript") |
|
11 |
+getKEGGdata<-function(KEGG_path){ |
|
12 |
+if (KEGG_path=="Carb_met") { |
|
13 |
+ mer<-select_path_carb(Carbohydrate) |
|
14 |
+ c<-proc_path(mer) |
|
15 |
+ a<-c[[2]] |
|
16 |
+} |
|
17 |
+ if (KEGG_path=="Ener_met") { |
|
18 |
+ mer<-select_path_en(Energy) |
|
19 |
+ c<-proc_path(mer) |
|
20 |
+ a<-c[[2]] |
|
21 |
+ } |
|
22 |
+ if (KEGG_path=="Lip_met") { |
|
23 |
+ mer<-select_path_lip(Lipid) |
|
24 |
+ c<-proc_path(mer) |
|
25 |
+ a<-c[[2]] |
|
26 |
+ } |
|
27 |
+ if (KEGG_path=="Amn_met") { |
|
28 |
+ mer<-select_path_amn(Aminoacid) |
|
29 |
+ c<-proc_path(mer) |
|
30 |
+ a<-c[[2]] |
|
31 |
+ } |
|
32 |
+ if (KEGG_path=="Gly_bio_met") { |
|
33 |
+ mer<-select_path_gly(Glybio_met) |
|
34 |
+ c<-proc_path(mer) |
|
35 |
+ a<-c[[2]] |
|
36 |
+ } |
|
37 |
+ if (KEGG_path=="Cof_vit_met") { |
|
38 |
+ mer<-select_path_gly(Cofa_vita_met) |
|
39 |
+ c<-proc_path(mer) |
|
40 |
+ a<-c[[2]] |
|
41 |
+ } |
|
42 |
+ if (KEGG_path=="Transcript") { |
|
43 |
+ mer<-select_path_transc(Transcription) |
|
44 |
+ c<-proc_path(mer) |
|
45 |
+ a<-c[[2]] |
|
46 |
+ } |
|
47 |
+ if (KEGG_path=="Transl") { |
|
48 |
+ mer<-select_path_transl(Translation) |
|
49 |
+ c<-proc_path(mer) |
|
50 |
+ a<-c[[2]] |
|
51 |
+ } |
|
52 |
+ if (KEGG_path=="Fold_degr") { |
|
53 |
+ mer<-select_path_fold(Folding_sorting_and_degradation) |
|
54 |
+ c<-proc_path(mer) |
|
55 |
+ a<-c[[2]] |
|
56 |
+ } |
|
57 |
+ if (KEGG_path=="Repl_repair") { |
|
58 |
+ mer<-select_path_repl(Replication_and_repair) |
|
59 |
+ c<-proc_path(mer) |
|
60 |
+ a<-c[[2]] |
|
61 |
+ } |
|
62 |
+ if (KEGG_path=="sign_transd") { |
|
63 |
+ mer<-select_path_sign(Signal_transduction) |
|
64 |
+ c<-proc_path(mer) |
|
65 |
+ a<-c[[2]] |
|
66 |
+ } |
|
67 |
+ if (KEGG_path=="sign_mol_int") { |
|
68 |
+ mer<-select_path_sign_mol(Signaling_molecules_and_interaction) |
|
69 |
+ c<-proc_path(mer) |
|
70 |
+ a<-c[[2]] |
|
71 |
+ } |
|
72 |
+ if (KEGG_path=="Transp_cat") { |
|
73 |
+ mer<-select_path_transp_ca(Transport_and_catabolism) |
|
74 |
+ c<-proc_path(mer) |
|
75 |
+ a<-c[[2]] |
|
76 |
+ } |
|
77 |
+ if (KEGG_path=="cell_grow_d") { |
|
78 |
+ mer<-select_path_cell_grow(Cell_growth_and_death) |
|
79 |
+ c<-proc_path(mer) |
|
80 |
+ a<-c[[2]] |
|
81 |
+ } |
|
82 |
+ if (KEGG_path=="cell_comm") { |
|
83 |
+ mer<-select_path_cell_comm(Cellular_community) |
|
84 |
+ c<-proc_path(mer) |
|
85 |
+ a<-c[[2]] |
|
86 |
+ } |
|
87 |
+ if (KEGG_path=="imm_syst") { |
|
88 |
+ mer<-select_path_imm_syst(Immune_system) |
|
89 |
+ c<-proc_path(mer) |
|
90 |
+ a<-c[[2]] |
|
91 |
+ } |
|
92 |
+ if (KEGG_path=="end_syst") { |
|
93 |
+ mer<-select_path_end_syst(Endocrine_system) |
|
94 |
+ c<-proc_path(mer) |
|
95 |
+ a<-c[[2]] |
|
96 |
+ } |
|
97 |
+ if (KEGG_path=="circ_syst") { |
|
98 |
+ mer<-select_path_circ_syst(Circulatory_system) |
|
99 |
+ c<-proc_path(mer) |
|
100 |
+ a<-c[[2]] |
|
101 |
+ } |
|
102 |
+ if (KEGG_path=="dig_syst") { |
|
103 |
+ mer<-select_path_dig_syst(Digestive_system) |
|
104 |
+ c<-proc_path(mer) |
|
105 |
+ a<-c[[2]] |
|
106 |
+ } |
|
107 |
+ if (KEGG_path=="exc_syst") { |
|
108 |
+ mer<-select_path_exc_syst(Excretory_system) |
|
109 |
+ c<-proc_path(mer) |
|
110 |
+ a<-c[[2]] |
|
111 |
+ } |
|
112 |
+ if (KEGG_path=="nerv_syst") { |
|
113 |
+ mer<-select_path_ner_syst(Nervous_system) |
|
114 |
+ c<-proc_path(mer) |
|
115 |
+ a<-c[[2]] |
|
116 |
+ } |
|
117 |
+ if (KEGG_path=="sens_syst") { |
|
118 |
+ mer<-select_path_sens_syst(Sensory_system) |
|
119 |
+ c<-proc_path(mer) |
|
120 |
+ a<-c[[2]] |
|
121 |
+ } |
|
122 |
+if (KEGG_path=="KEGG_path") { |
|
123 |
+ pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways |
|
124 |
+pathway.codes <- sub("path:", "", names(pathways.list)) |
|
125 |
+pathways.list<-list(pathways.list) |
|
126 |
+pathways.list<-pathways.list[lapply(pathways.list,length)!=0] |
|
127 |
+a<-do.call("cbind", pathways.list) |
|
128 |
+} |
|
129 |
+pathway.codes<-c[[1]] |
|
130 |
+genes.by.pathway <- sapply(pathway.codes, |
|
131 |
+ function(pwid){ |
|
132 |
+ pw <- keggGet(pwid) |
|
133 |
+ pw[[1]]$GENE[c(TRUE, FALSE)] |
|
134 |
+ }) |
|
135 |
+x <- org.Hs.egSYMBOL2EG |
|
136 |
+mapped_genes <- mappedkeys(x) |
|
137 |
+xx <- as.list(x[mapped_genes]) |
|
138 |
+top3 <- matrix(0, length(xx), length(genes.by.pathway)) |
|
139 |
+rownames(top3) <- names(xx) |
|
140 |
+colnames(top3)<- names(genes.by.pathway) |
|
141 |
+for (j in 1:length(xx)){ |
|
142 |
+ for (k in 1:length(genes.by.pathway)){ |
|
143 |
+ if (length(intersect(xx[[j]],genes.by.pathway[[k]])!=0)){ |
|
144 |
+ |
|
145 |
+ top3[j,k]<-names(xx[j]) |
|
146 |
+ } |
|
147 |
+ } |
|
148 |
+} |
|
149 |
+top3[top3 == 0] <- " " |
|
150 |
+#a<-data.frame(pathways.list) |
|
151 |
+#i <- sapply(a, is.factor) |
|
152 |
+#a[i] <- lapply(a[i], as.character) |
|
153 |
+rownames(a)<-sub("path:","",rownames(a)) |
|
154 |
+PROVA<-top3 |
|
155 |
+for( i in 1:ncol(PROVA)) { |
|
156 |
+ if (colnames(PROVA)[i]==rownames(a)[i]){ |
|
157 |
+ colnames(PROVA)[i]<-a[i] |
|
158 |
+} |
|
159 |
+} |
|
160 |
+return(PROVA) |
|
161 |
+} |
|
162 |
+ |
|
163 |
+ |
|
164 |
+#' @title Get network data. |
|
165 |
+#' @description getNETdata creates a data frame with network data. |
|
166 |
+#' Network category can be filtered among: physical interactions, co-localization, genetic interactions and shared protein domain. |
|
167 |
+#' @param network variable. The user can use the following parameters |
|
168 |
+#' based on the network types to be used. PHint for Physical_interactions, |
|
169 |
+#' COloc for Co-localization, GENint for Genetic_interactions and |
|
170 |
+#' SHpd for Shared_protein_domains |
|
171 |
+#' @param organism organism==NULL default value is homo sapiens |
|
172 |
+#' @export |
|
173 |
+#' @importFrom SpidermiR SpidermiRquery_species SpidermiRquery_spec_networks SpidermiRdownload_net |
|
174 |
+#' @return dataframe with gene-gene (or protein-protein interactions) |
|
175 |
+#' @examples |
|
176 |
+#' organism="Saccharomyces_cerevisiae" |
|
177 |
+#' netw<-getNETdata(network="SHpd",organism) |
|
178 |
+getNETdata<-function(network,organism=NULL){ |
|
179 |
+ org_shar_pro<-SpidermiRquery_species(species) |
|
180 |
+ if (is.null(organism)) { |
|
181 |
+ net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[6,],network) |
|
182 |
+ out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot) |
|
183 |
+ # geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[6,],data = out_net_shar_pro) |
|
184 |
+ } |
|
185 |
+ if( !is.null(organism) ){ |
|
186 |
+ net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[9,],network) |
|
187 |
+ out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot) |
|
188 |
+ # geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[9,],data = out_net_shar_pro) |
|
189 |
+} |
|
190 |
+ #ds_shar_pro<-do.call("rbind", geneSymb_net_shar_pro) |
|
191 |
+ #data_shar_pro<-as.data.frame(ds_shar_pro[!duplicated(ds_shar_pro), ]) |
|
192 |
+ #sdc_shar_pro<-unlist(data_shar_pro$gene_symbolA,data_shar_pro$gene_symbolB) |
|
193 |
+ #m_shar_pro<-c(data_shar_pro$gene_symbolA) |
|
194 |
+ #m2_shar_pro<-c(data_shar_pro$gene_symbolB) |
|
195 |
+ #ss_shar_pro<-cbind(m_shar_pro,m2_shar_pro) |
|
196 |
+ #data_pr_shar_pro<-as.data.frame(ss_shar_pro[!duplicated(ss_shar_pro), ]) |
|
197 |
+ #colnames(data_pr_shar_pro) <- c("m_shar_pro", "m2_shar_pro") |
|
198 |
+return(out_net_shar_pro) |
|
199 |
+} |
|
200 |
+ |
|
201 |
+ |
|
202 |
+ |
|
203 |
+ |
|
204 |
+ |
0 | 205 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,534 @@ |
1 |
+#overlap <- function(net_type,x,currentPathway_genes){ |
|
2 |
+ # de<-net_type[which(net_type$m_shar_pro==x),] |
|
3 |
+# fr<-intersect(de$m2_shar_pro,currentPathway_genes) |
|
4 |
+ # go=list() |
|
5 |
+ #if(length(fr)!=0) { |
|
6 |
+ # for (i in 1:length(fr)){ |
|
7 |
+ # de2<-de[which(de$m2_shar_pro==fr[i]),] |
|
8 |
+ # go[[i]]<-de2 |
|
9 |
+ #} |
|
10 |
+ #} |
|
11 |
+# dst<-do.call("rbind", go) |
|
12 |
+ # return(dst) |
|
13 |
+#} |
|
14 |
+ |
|
15 |
+ |
|
16 |
+select_path_carb<-function(Carbohydrate){ |
|
17 |
+species<-c("- Homo sapiens (human)") |
|
18 |
+a<-paste("Glycolysis / Gluconeogenesis", species) |
|
19 |
+b<-paste("Citrate cycle (TCA cycle)", species) |
|
20 |
+c<-paste("Pentose phosphate pathway", species) |
|
21 |
+d<-paste("Pentose and glucuronate interconversions", species) |
|
22 |
+e<-paste("Fructose and mannose metabolism", species) |
|
23 |
+f<-paste("Galactose metabolism", species) |
|
24 |
+g<-paste("Ascorbate and aldarate metabolism", species) |
|
25 |
+h<-paste("Starch and sucrose metabolism", species) |
|
26 |
+i<-paste("Amino sugar and nucleotide sugar metabolism", species) |
|
27 |
+l<-paste("Pyruvate metabolism", species) |
|
28 |
+m<-paste("Glyoxylate and dicarboxylate metabolism", species) |
|
29 |
+n<-paste("Propanoate metabolism", species) |
|
30 |
+o<-paste("Butanoate metabolism", species) |
|
31 |
+p<-paste("C5-Branched dibasic acid metabolism", species) |
|
32 |
+q<-paste("Inositol phosphate metabolism", species) |
|
33 |
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q) |
|
34 |
+return(mer) |
|
35 |
+} |
|
36 |
+ |
|
37 |
+select_path_en<-function(Energy){ |
|
38 |
+ species<-c("- Homo sapiens (human)") |
|
39 |
+ r<-paste("Oxidative phosphorylation", species) |
|
40 |
+ s<-paste("Photosynthesis", species) |
|
41 |
+ t<-paste("Photosynthesis - antenna proteins", species) |
|
42 |
+ v<-paste("Carbon fixation in photosynthetic organisms", species) |
|
43 |
+ u<-paste("Carbon fixation pathways in prokaryotes", species) |
|
44 |
+ z<-paste("Methane metabolism", species) |
|
45 |
+ aa<-paste("Nitrogen metabolism", species) |
|
46 |
+ ab<-paste("Sulfur metabolism", species) |
|
47 |
+ mer<-c(r,s,t,v,u,z,aa,ab) |
|
48 |
+ return(mer) |
|
49 |
+} |
|
50 |
+ |
|
51 |
+ |
|
52 |
+select_path_lip<-function(Lipid){ |
|
53 |
+ species<-c("- Homo sapiens (human)") |
|
54 |
+ac<-paste("Fatty acid biosynthesis", species) |
|
55 |
+ad<-paste("Fatty acid elongation", species) |
|
56 |
+ae<-paste("Fatty acid degradation", species) |
|
57 |
+af<-paste("Synthesis and degradation of ketone bodies", species) |
|
58 |
+ag<-paste("Cutin, suberine and wax biosynthesis", species) |
|
59 |
+ah<-paste("Steroid biosynthesis", species) |
|
60 |
+ai<-paste("Primary bile acid biosynthesis", species) |
|
61 |
+al<-paste("Secondary bile acid biosynthesis", species) |
|
62 |
+am<-paste("Steroid hormone biosynthesis", species) |
|
63 |
+an<-paste("Glycerolipid metabolism", species) |
|
64 |
+ao<-paste("Glycerophospholipid metabolism", species) |
|
65 |
+ap<-paste("Ether lipid metabolism", species) |
|
66 |
+aq<-paste("Sphingolipid metabolism", species) |
|
67 |
+ar<-paste("Arachidonic acid metabolism", species) |
|
68 |
+as<-paste("Linoleic acid metabolism", species) |
|
69 |
+at<-paste("alpha-Linolenic acid metabolism", species) |
|
70 |
+av<-paste("Biosynthesis of unsaturated fatty acids", species) |
|
71 |
+ |
|
72 |
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av) |
|
73 |
+return(mer) |
|
74 |
+} |
|
75 |
+ |
|
76 |
+ |
|
77 |
+ |
|
78 |
+ |
|
79 |
+select_path_amn<-function(Aminoacid){ |
|
80 |
+ species<-c("- Homo sapiens (human)") |
|
81 |
+ac<-paste("Alanine, aspartate and glutamate metabolism", species) |
|
82 |
+ad<-paste("Glycine, serine and threonine metabolism", species) |
|
83 |
+ae<-paste("Cysteine and methionine metabolism", species) |
|
84 |
+af<-paste("Valine, leucine and isoleucine degradation", species) |
|
85 |
+ag<-paste("Valine, leucine and isoleucine biosynthesis", species) |
|
86 |
+ah<-paste("Lysine biosynthesis", species) |
|
87 |
+ai<-paste("Lysine degradation", species) |
|
88 |
+al<-paste("Arginine biosynthesis", species) |
|
89 |
+am<-paste("Arginine and proline metabolism", species) |
|
90 |
+an<-paste("Histidine metabolism", species) |
|
91 |
+ao<-paste("Tyrosine metabolism", species) |
|
92 |
+ap<-paste("Phenylalanine metabolism", species) |
|
93 |
+aq<-paste("Tryptophan metabolism", species) |
|
94 |
+ar<-paste("Phenylalanine, tyrosine and tryptophan biosynthesis", species) |
|
95 |
+as<-paste("beta-Alanine metabolism", species) |
|
96 |
+at<-paste("Taurine and hypotaurine metabolism", species) |
|
97 |
+av<-paste("Phosphonate and phosphinate metabolism", species) |
|
98 |
+au<-paste("Selenocompound metabolism", species) |
|
99 |
+az<-paste("Cyanoamino acid metabolism", species) |
|
100 |
+a<-paste("D-Glutamine and D-glutamate metabolism", species) |
|
101 |
+b<-paste("D-Arginine and D-ornithine metabolism", species) |
|
102 |
+c<-paste("D-Alanine metabolism", species) |
|
103 |
+d<-paste("Glutathione metabolism", species) |
|
104 |
+ |
|
105 |
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av,au,az,a,b,c,d) |
|
106 |
+return(mer) |
|
107 |
+} |
|
108 |
+ |
|
109 |
+select_path_gly<-function(Glybio_met){ |
|
110 |
+ac<-paste("N-Glycan biosynthesis", species) |
|
111 |
+ad<-paste("Various types of N-glycan biosynthesis", species) |
|
112 |
+ae<-paste("Mucin type O-Glycan biosynthesis", species) |
|
113 |
+af<-paste("Other types of O-glycan biosynthesis", species) |
|
114 |
+ag<-paste("Glycosaminoglycan biosynthesis - CS/DS", species) |
|
115 |
+ah<-paste("Glycosaminoglycan biosynthesis - HS/Hep", species) |
|
116 |
+ai<-paste("Glycosaminoglycan biosynthesis - KS", species) |
|
117 |
+al<-paste("Glycosaminoglycan degradation", species) |
|
118 |
+am<-paste("Glycosylphosphatidylinositol(GPI)-anchor biosynthesis", species) |
|
119 |
+an<-paste("Glycosphingolipid biosynthesis - lacto and neolacto series", species) |
|
120 |
+ao<-paste("Glycosphingolipid biosynthesis - globo series", species) |
|
121 |
+ap<-paste("Glycosphingolipid biosynthesis - ganglio series", species) |
|
122 |
+aq<-paste("Lipopolysaccharide biosynthesis", species) |
|
123 |
+ar<-paste("Peptidoglycan biosynthesis", species) |
|
124 |
+as<-paste("Other glycan degradation", species) |
|
125 |
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as) |
|
126 |
+return(mer) |
|
127 |
+} |
|
128 |
+ |
|
129 |
+ |
|
130 |
+ |
|
131 |
+select_path_cofa<-function(Cofa_vita_met){ |
|
132 |
+ species<-c("- Homo sapiens (human)") |
|
133 |
+ac<-paste("Thiamine metabolism", species) |
|
134 |
+ad<-paste("Riboflavin metabolism", species) |
|
135 |
+ae<-paste("Vitamin B6 metabolism", species) |
|
136 |
+af<-paste("Nicotinate and nicotinamide metabolism", species) |
|
137 |
+ag<-paste("Pantothenate and CoA biosynthesis", species) |
|
138 |
+ah<-paste("Biotin metabolism", species) |
|
139 |
+ai<-paste("Lipoic acid metabolism", species) |
|
140 |
+al<-paste("Folate biosynthesis", species) |
|
141 |
+am<-paste("One carbon pool by folate", species) |
|
142 |
+an<-paste("Retinol metabolism", species) |
|
143 |
+ao<-paste("Porphyrin and chlorophyll metabolism", species) |
|
144 |
+ap<-paste("Ubiquinone and other terpenoid-quinone biosynthesis", species) |
|
145 |
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap) |
|
146 |
+return(mer) |
|
147 |
+} |
|
148 |
+ |
|
149 |
+select_path_transc<-function(Transcription){ |
|
150 |
+ species<-c("- Homo sapiens (human)") |
|
151 |
+ac<-paste("RNA polymerase", species) |
|
152 |
+ad<-paste("Basal transcription factors", species) |
|
153 |
+ae<-paste("Spliceosome", species) |
|
154 |
+af<-paste("Transcription factors", species) |
|
155 |
+ag<-paste("Transcription machinery", species) |
|
156 |
+mer<-c(ac,ad,ae,af,ag) |
|
157 |
+return(mer) |
|
158 |
+} |
|
159 |
+ |
|
160 |
+ |
|
161 |
+ |
|
162 |
+select_path_transl<-function(Translation){ |
|
163 |
+ species<-c("- Homo sapiens (human)") |
|
164 |
+ac<-paste("Ribosome", species) |
|
165 |
+ad<-paste("Aminoacyl-tRNA biosynthesis", species) |
|
166 |
+ae<-paste("RNA transport", species) |
|
167 |
+af<-paste("mRNA surveillance pathway", species) |
|
168 |
+ag<-paste("Ribosome biogenesis in eukaryotes", species) |
|
169 |
+ah<-paste("Ribosomal proteins", species) |
|
170 |
+ai<-paste("Ribosome biogenesis", species) |
|
171 |
+al<-paste("Transfer RNA biogenesis", species) |
|
172 |
+am<-paste("Translation factors", species) |
|
173 |
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am) |
|
174 |
+return(mer) |
|
175 |
+} |
|
176 |
+ |
|
177 |
+select_path_fold<-function(Folding_sorting_and_degradation){ |
|
178 |
+ species<-c("- Homo sapiens (human)") |
|
179 |
+ac<-paste("Protein export", species) |
|
180 |
+ad<-paste("Protein processing in endoplasmic reticulum", species) |
|
181 |
+ae<-paste("SNARE interactions in vesicular transport", species) |
|
182 |
+af<-paste("Ubiquitin mediated proteolysis", species) |
|
183 |
+ag<-paste("Sulfur relay system", species) |
|
184 |
+ah<-paste("RNA degradation", species) |
|
185 |
+ai<-paste("Chaperones and folding catalysts", species) |
|
186 |
+al<-paste("SNAREs", species) |
|
187 |
+am<-paste("Ubiquitin system", species) |
|
188 |
+an<-paste("Proteasome", species) |
|
189 |
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an) |
|
190 |
+return(mer) |
|
191 |
+} |
|
192 |
+ |
|
193 |
+ |
|
194 |
+ |
|
195 |
+ |
|
196 |
+select_path_repl<-function(Replication_and_repair){ |
|
197 |
+ species<-c("- Homo sapiens (human)") |
|
198 |
+ac<-paste("DNA replication", species) |
|
199 |
+ad<-paste("Base excision repair", species) |
|
200 |
+ae<-paste("Nucleotide excision repair", species) |
|
201 |
+af<-paste("Mismatch repair", species) |
|
202 |
+ag<-paste("Homologous recombination", species) |
|
203 |
+ah<-paste("Non-homologous end-joining", species) |
|
204 |
+ai<-paste("Fanconi anemia pathway", species) |
|
205 |
+al<-paste("DNA replication proteins", species) |
|
206 |
+am<-paste("Chromosome", species) |
|
207 |
+an<-paste("DNA repair and recombination", species) |
|
208 |
+ao<-paste("proteins", species) |
|
209 |
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao) |
|
210 |
+return(mer) |
|
211 |
+} |
|
212 |
+ |
|
213 |
+ |
|
214 |
+ |
|
215 |
+select_path_sign<-function(Signal_transduction){ |
|
216 |
+ species<-c("- Homo sapiens (human)") |
|
217 |
+a<-paste("Ras signaling pathway", species) |
|
218 |
+b<-paste("Rap1 signaling pathway", species) |
|
219 |
+c<-paste("MAPK signaling pathway", species) |
|
220 |
+d<-paste("ErbB signaling pathway", species) |
|
221 |
+e<-paste("Wnt signaling pathway", species) |
|
222 |
+f<-paste("Notch signaling pathway", species) |
|
223 |
+g<-paste("Hedgehog signaling pathway", species) |
|
224 |
+h<-paste("TGF-beta signaling pathway", species) |
|
225 |
+i<-paste("Hippo signaling pathway", species) |
|
226 |
+l<-paste("VEGF signaling pathway", species) |
|
227 |
+m<-paste("Jak-STAT signaling pathway", species) |
|
228 |
+n<-paste("NF-kappa B signaling pathway", species) |
|
229 |
+o<-paste("TNF signaling pathway", species) |
|
230 |
+p<-paste("HIF-1 signaling pathway", species) |
|
231 |
+q<-paste("FoxO signaling pathway", species) |
|
232 |
+r<-paste("Calcium signaling pathway", species) |
|
233 |
+s<-paste("Phosphatidylinositol signaling system", species) |
|
234 |
+t<-paste("Phospholipase D signaling pathway", species) |
|
235 |
+v<-paste("Sphingolipid signaling pathway", species) |
|
236 |
+u<-paste("cAMP signaling pathway", species) |
|
237 |
+z<-paste("cGMP-PKG signaling pathway", species) |
|
238 |
+ab<-paste("PI3K-Akt signaling pathway", species) |
|
239 |
+ac<-paste("AMPK signaling pathway", species) |
|
240 |
+ad<-paste("mTOR signaling pathway", species) |
|
241 |
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,v,u,z,ab,ac,ad) |
|
242 |
+return(mer) |
|
243 |
+} |
|
244 |
+ |
|
245 |
+ |
|
246 |
+select_path_sign_mol<-function(Signaling_molecules_and_interaction){ |
|
247 |
+ species<-c("- Homo sapiens (human)") |
|
248 |
+a<-paste("Neuroactive ligand-receptor interaction", species) |
|
249 |
+b<-paste("Cytokine-cytokine receptor interaction", species) |
|
250 |
+c<-paste("ECM-receptor interaction", species) |
|
251 |
+d<-paste("Cell adhesion molecules (CAMs)", species) |
|
252 |
+mer<-c(a,b,c,d) |
|
253 |
+return(mer) |
|
254 |
+} |
|
255 |
+ |
|
256 |
+ |
|
257 |
+select_path_transp_ca<-function(Transport_and_catabolism){ |
|
258 |
+ species<-c("- Homo sapiens (human)") |
|
259 |
+a<-paste("Endocytosis", species) |
|
260 |
+b<-paste("Phagosome", species) |
|
261 |
+c<-paste("Lysosome", species) |
|
262 |
+d<-paste("Peroxisome", species) |
|
263 |
+e<-paste("Regulation of autophagy", species) |
|
264 |
+mer<-c(a,b,c,d,e) |
|
265 |
+return(mer) |
|
266 |
+} |
|
267 |
+ |
|
268 |
+select_path_cell_grow<-function(Cell_growth_and_death){ |
|
269 |
+ species<-c("- Homo sapiens (human)") |
|
270 |
+ a<-paste("Cell cycle", species) |
|
271 |
+b<-paste("Apoptosis", species) |
|
272 |
+c<-paste("p53 signaling pathway", species) |
|
273 |
+mer<-c(a,b,c) |
|
274 |
+return(mer) |
|
275 |
+} |
|
276 |
+ |
|
277 |
+ |
|
278 |
+select_path_cell_comm<-function(Cellular_community){ |
|
279 |
+ species<-c("- Homo sapiens (human)") |
|
280 |
+ a<-paste("Focal adhesion", species) |
|
281 |
+b<-paste("Adherens junction", species) |
|
282 |
+c<-paste("Tight junction", species) |
|
283 |
+d<-paste("Gap junction", species) |
|
284 |
+e<-paste("Signaling pathways regulating pluripotency of stem cells ", species) |
|
285 |
+mer<-c(a,b,c,d,e) |
|
286 |
+return(mer) |
|
287 |
+} |
|
288 |
+ |
|
289 |
+ |
|
290 |
+select_path_imm_syst<-function(Immune_system){ |
|
291 |
+ species<-c("- Homo sapiens (human)") |
|
292 |
+a<-paste("Hematopoietic cell lineage", species) |
|
293 |
+b<-paste("Complement and coagulation cascades", species) |
|
294 |
+c<-paste("Platelet activation", species) |
|
295 |
+d<-paste("Toll-like receptor signaling pathway", species) |
|
296 |
+e<-paste("Toll and Imd signaling pathway", species) |
|
297 |
+f<-paste("NOD-like receptor signaling pathway", species) |
|
298 |
+g<-paste("RIG-I-like receptor signaling pathway", species) |
|
299 |
+h<-paste("Cytosolic DNA-sensing pathway", species) |
|
300 |
+i<-paste("Natural killer cell mediated cytotoxicity", species) |
|
301 |
+l<-paste("Antigen processing and presentation", species) |
|
302 |
+m<-paste("T cell receptor signaling pathway", species) |
|
303 |
+n<-paste("B cell receptor signaling pathway", species) |
|
304 |
+o<-paste("Fc epsilon RI signaling pathway", species) |
|
305 |
+p<-paste("Fc gamma R-mediated phagocytosis", species) |
|
306 |
+q<-paste("Leukocyte transendothelial migration", species) |
|
307 |
+r<-paste("Intestinal immune network for IgA production", species) |
|
308 |
+s<-paste("Chemokine signaling pathway", species) |
|
309 |
+ |
|
310 |
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s) |
|
311 |
+return(mer) |
|
312 |
+} |
|
313 |
+ |
|
314 |
+ |
|
315 |
+ |
|
316 |
+ |
|
317 |
+select_path_end_syst<-function(Endocrine_system){ |
|
318 |
+ species<-c("- Homo sapiens (human)") |
|
319 |
+a<-paste("Insulin secretion", species) |
|
320 |
+b<-paste("Insulin signaling pathway", species) |
|
321 |
+c<-paste("Glucagon signaling pathway", species) |
|
322 |
+d<-paste("Regulation of lipolysis in adipocytes", species) |
|
323 |
+e<-paste("Adipocytokine signaling pathway", species) |
|
324 |
+f<-paste("PPAR signaling pathway", species) |
|
325 |
+g<-paste("GnRH signaling pathway", species) |
|
326 |
+h<-paste("Ovarian steroidogenesis", species) |
|
327 |
+i<-paste("Estrogen signaling pathway", species) |
|
328 |
+l<-paste("Progesterone-mediated oocyte maturation", species) |
|
329 |
+m<-paste("Prolactin signaling pathway", species) |
|
330 |
+n<-paste("Oxytocin signaling pathway", species) |
|
331 |
+o<-paste("Thyroid hormone synthesis", species) |
|
332 |
+p<-paste("Thyroid hormone signaling pathway", species) |
|
333 |
+q<-paste("Melanogenesis", species) |
|
334 |
+r<-paste("Renin secretion", species) |
|
335 |
+s<-paste("Renin-angiotensin system", species) |
|
336 |
+t<-paste("Aldosterone synthesis and secretion", species) |
|
337 |
+ |
|
338 |
+ |
|
339 |
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t) |
|
340 |
+return(mer) |
|
341 |
+} |
|
342 |
+ |
|
343 |
+ |
|
344 |
+select_path_circ_syst<-function(Circulatory_system){ |
|
345 |
+ species<-c("- Homo sapiens (human)") |
|
346 |
+ a<-paste("Cardiac muscle contraction", species) |
|
347 |
+b<-paste("Adrenergic signaling in cardiomyocytes", species) |
|
348 |
+c<-paste("Vascular smooth muscle contraction", species) |
|
349 |
+mer<-c(a,b,c) |
|
350 |
+return(mer) |
|
351 |
+} |
|
352 |
+ |
|
353 |
+ |
|
354 |
+select_path_dig_syst<-function(Digestive_system){ |
|
355 |
+ species<-c("- Homo sapiens (human)") |
|
356 |
+ a<-paste("Salivary secretion", species) |
|
357 |
+b<-paste("Gastric acid secretion", species) |
|
358 |
+c<-paste("Pancreatic secretion", species) |
|
359 |
+d<-paste("Bile secretion", species) |
|
360 |
+e<-paste("Carbohydrate digestion and absorption", species) |
|
361 |
+f<-paste("Protein digestion and absorption", species) |
|
362 |
+g<-paste("Fat digestion and absorption", species) |
|
363 |
+h<-paste("Vitamin digestion and absorption", species) |
|
364 |
+i<-paste("Mineral absorption", species) |
|
365 |
+ |
|
366 |
+mer<-c(a,b,c,d,e,f,g,h,i) |
|
367 |
+return(mer) |
|
368 |
+} |
|
369 |
+ |
|
370 |
+ |
|
371 |
+ |
|
372 |
+select_path_exc_syst<-function(Excretory_system){ |
|
373 |
+ species<-c("- Homo sapiens (human)") |
|
374 |
+ a<-paste("Vasopressin-regulated water reabsorption", species) |
|
375 |
+b<-paste("Aldosterone-regulated sodium reabsorption", species) |
|
376 |
+c<-paste("Endocrine and other factor-regulated calcium reabsorption", species) |
|
377 |
+d<-paste("Proximal tubule bicarbonate reclamation", species) |
|
378 |
+e<-paste("Collecting duct acid secretion", species) |
|
379 |
+ |
|
380 |
+ |
|
381 |
+mer<-c(a,b,c,d,e) |
|
382 |
+return(mer) |
|
383 |
+} |
|
384 |
+ |
|
385 |
+ |
|
386 |
+select_path_ner_syst<-function(Nervous_system){ |
|
387 |
+ species<-c("- Homo sapiens (human)") |
|
388 |
+a<-paste("Glutamatergic synapse", species) |
|
389 |
+b<-paste("GABAergic synapse", species) |
|
390 |
+c<-paste("Cholinergic synapse", species) |
|
391 |
+d<-paste("Dopaminergic synapse", species) |
|
392 |
+e<-paste("Serotonergic synapse", species) |
|
393 |
+f<-paste("Long-term potentiation", species) |
|
394 |
+g<-paste("Long-term depression", species) |
|
395 |
+h<-paste("Retrograde endocannabinoid signaling", species) |
|
396 |
+i<-paste("Synaptic vesicle cycle", species) |
|
397 |
+l<-paste("Neurotrophin signaling pathway", species) |
|
398 |
+ |
|
399 |
+mer<-c(a,b,c,d,e,f,g,h,i,l) |
|
400 |
+return(mer) |
|
401 |
+} |
|
402 |
+ |
|
403 |
+ |
|
404 |
+select_path_sens_syst<-function(Sensory_system){ |
|
405 |
+ species<-c("- Homo sapiens (human)") |
|
406 |
+ a<-paste("Phototransduction", species) |
|
407 |
+b<-paste("Olfactory transduction", species) |
|
408 |
+c<-paste("Taste transduction", species) |
|
409 |
+d<-paste("Inflammatory mediator regulation of TRP channels", species) |
|
410 |
+mer<-c(a,b,c,d) |
|
411 |
+return(mer) |
|
412 |
+} |
|
413 |
+ |
|
414 |
+ |
|
415 |
+ |
|
416 |
+#' @title Select the class of TCGA data |
|
417 |
+#' @description select two labels from ID barcode |
|
418 |
+#' @param Dataset gene expression matrix |
|
419 |
+#' @param typesample the labels of the samples (e.g. tumor,normal) |
|
420 |
+#' @export |
|
421 |
+#' @return a gene expression matrix of the samples with specified label |
|
422 |
+#' @examples |
|
423 |
+#' tumo<-SelectedSample(Dataset=Data_CANCER_normUQ_filt,typesample="tumor")[,2] |
|
424 |
+SelectedSample <- function(Dataset,typesample){ |
|
425 |
+ if( typesample =="tumor"){ |
|
426 |
+ Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) == 01) ] |
|
427 |
+ } |
|
428 |
+ |
|
429 |
+ if( typesample =="normal"){ |
|
430 |
+ Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) >= 10) ] |
|
431 |
+ } |
|
432 |
+ |
|
433 |
+ return(Dataset) |
|
434 |
+ |
|
435 |
+} |
|
436 |
+ |
|
437 |
+ |
|
438 |
+#' @title Select the class of TCGA data |
|
439 |
+#' @description select two labels from ID barcode |
|
440 |
+#' @param cutoff cut-off for AUC value |
|
441 |
+#' @param auc.df list of AUC value |
|
442 |
+#' @return a gene expression matrix with only pairwise pathway with a particular cut-off |
|
443 |
+select_class<-function(auc.df,cutoff){ |
|
444 |
+ds<-do.call("rbind", auc.df) |
|
445 |
+tmp_ordered <- as.data.frame(ds[order(ds,decreasing=TRUE),]) |
|
446 |
+colnames(tmp_ordered)<-'pathway' |
|
447 |
+er<-as.data.frame(tmp_ordered$pathway>cutoff) |
|
448 |
+ase<-tmp_ordered[tmp_ordered$pathway>cutoff,] |
|
449 |
+rownames(er)<-rownames(tmp_ordered) |
|
450 |
+er[,2]<-tmp_ordered$pathway |
|
451 |
+lipid_metabolism<-er[1:length(ase),] |
|
452 |
+return(lipid_metabolism) |
|
453 |
+} |
|
454 |
+ |
|
455 |
+ |
|
456 |
+ |
|
457 |
+ |
|
458 |
+#' @title Process matrix TCGA data after the selection of pairwise pathway |
|
459 |
+#' @description processing gene expression matrix |
|
460 |
+#' @param measure matrix with measure of cross-talk among pathways |
|
461 |
+#' @param list_perf output of the function select_class |
|
462 |
+#' @return a gene expression matrix for case study 1 |
|
463 |
+process_matrix<-function(measure,list_perf){ |
|
464 |
+scoreMatrix <- as.data.frame(measure[,3:ncol(measure)]) |
|
465 |
+for( i in 1: ncol(scoreMatrix)){ |
|
466 |
+ scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i])) |
|
467 |
+} |
|
468 |
+measure[,1] <- gsub(" ", "_", measure[,1]) |
|
469 |
+d<-sub('_-_Homo_sapiens_*', '', measure[,1]) |
|
470 |
+d_pr<- gsub("(human)", "", d, fixed="TRUE") |
|
471 |
+d_pr <- gsub("_", "", d_pr) |
|
472 |
+d_pr <- gsub("-", "", d_pr) |
|
473 |
+measure[,2] <- gsub(" ", "_", measure[,2]) |
|
474 |
+d2<-sub('_-_Homo_sapiens_(human)*', '', measure[,2]) |
|
475 |
+d_pr2<- gsub("(human)", "", d2, fixed="TRUE") |
|
476 |
+d_pr2 <- gsub("_", "", d_pr2) |
|
477 |
+d_pr2 <- gsub("-", "", d_pr2) |
|
478 |
+PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" ) |
|
479 |
+rownames(scoreMatrix) <-PathwaysPair |
|
480 |
+intera<-intersect(rownames(scoreMatrix),rownames(list_perf)) |
|
481 |
+path_bestlipd<-scoreMatrix[intera,] |
|
482 |
+return(path_bestlipd) |
|
483 |
+} |
|
484 |
+ |
|
485 |
+ |
|
486 |
+ |
|
487 |
+process_matrix_cell_process<-function(measure_cell_process){ |
|
488 |
+score__cell_grow_d <- as.data.frame(measure_cell_process[,3:ncol(measure_cell_process)]) |
|
489 |
+for( i in 1: ncol(score__cell_grow_d)){ |
|
490 |
+ score__cell_grow_d[,i] <- as.numeric(as.character(score__cell_grow_d[,i])) |
|
491 |
+} |
|
492 |
+ |
|
493 |
+measure_cell_process[,1] <- gsub(" ", "_", measure_cell_process[,1]) |
|
494 |
+d<-sub('_-_Homo_sapiens_*', '', measure_cell_process[,1]) |
|
495 |
+ |
|
496 |
+d_pr<- gsub("(human)", "", d, fixed="TRUE") |
|
497 |
+d_pr <- gsub("_", "", d_pr) |
|
498 |
+d_pr <- gsub("-", "", d_pr) |
|
499 |
+ |
|
500 |
+measure_cell_process[,2] <- gsub(" ", "_", measure_cell_process[,2]) |
|
501 |
+d2<-sub('_-_Homo_sapiens_(human)*', '', measure_cell_process[,2]) |
|
502 |
+d_pr2<- gsub("(human)", "", d2, fixed="TRUE") |
|
503 |
+d_pr2 <- gsub("_", "", d_pr2) |
|
504 |
+d_pr2 <- gsub("-", "", d_pr2) |
|
505 |
+ |
|
506 |
+PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" ) |
|
507 |
+rownames(score__cell_grow_d) <-PathwaysPair |
|
508 |
+return(score__cell_grow_d) |
|
509 |
+} |
|
510 |
+ |
|
511 |
+ |
|
512 |
+#' @title Get human KEGG pathway data. |
|
513 |
+#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway |
|
514 |
+#' @param mer output for example of select_path_carb |
|
515 |
+#' @export |
|
516 |
+#' @importFrom KEGGREST keggList |
|
517 |
+#' @return dataframe with human pathway data |
|
518 |
+proc_path<-function(mer){ |
|
519 |
+pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways |
|
520 |
+common<-intersect(pathways.list,mer) |
|
521 |
+lo<-list() |
|
522 |
+for (i in 1:length(pathways.list)){ |
|
523 |
+ if (length(intersect(pathways.list[[i]],common)!=0)){ |
|
524 |
+ lo[[i]]<-pathways.list[[i]] |
|
525 |
+ names(lo)[[i]]<-names(pathways.list)[[i]] |
|
526 |
+ } |
|
527 |
+} |
|
528 |
+ |
|
529 |
+pathways.list<-lo[lapply(lo,length)!=0] |
|
530 |
+pathway.codes <- sub("path:", "", names(pathways.list)) |
|
531 |
+b<-do.call("rbind", pathways.list) |
|
532 |
+list_pathkegg<-list(pathway.codes,b) |
|
533 |
+return(list_pathkegg) |
|
534 |
+} |
0 | 535 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,405 @@ |
1 |
+#' @title Get human KEGG pathway data and network data in order to define the common gene. |
|
2 |
+#' @description list_path_net creates a list of interacting genes for each human pathway. |
|
3 |
+#' @param net_type network data as provided by getNETdata |
|
4 |
+#' @param pathway pathway data as provided by getKEGGdata |
|
5 |
+#' @export |
|
6 |
+#' @return a list of genes for each pathway (interacting genes belong to that pathway) |
|
7 |
+#' @examples |
|
8 |
+#' list_path<-list_path_net(net_type=netw,pathway=path) |
|
9 |
+list_path_net<-function(net_type,pathway){ |
|
10 |
+ i <- sapply(net_type, is.factor) |
|
11 |
+ net_type[i] <- lapply(net_type[i], as.character) |
|
12 |
+ m<-c(net_type$m_shar_pro) |
|
13 |
+ m2<-c(net_type$m2_shar_pro) |
|
14 |
+ s<-c(m,m2) |
|
15 |
+ fr<- unique(s) |
|
16 |
+ n<-as.data.frame(fr) |
|
17 |
+ i <- sapply(n, is.factor) |
|
18 |
+ n[i] <- lapply(n[i], as.character) |
|
19 |
+ v=list() |
|
20 |
+ for (k in 1:ncol(pathway)){ |
|
21 |
+ if (length(intersect(n$fr,pathway[,k])!=0)){ |
|
22 |
+ print(colnames(pathway)[k]) |
|
23 |
+ aa<-intersect(n$fr,pathway[,k]) |
|
24 |
+ v[[k]]<-aa |
|
25 |
+ names(v)[k]<-colnames(pathway)[k] |
|
26 |
+ } |
|
27 |
+ } |
|
28 |
+ |
|
29 |
+ return(v) |
|
30 |
+} |
|
31 |
+ |
|
32 |
+ |
|
33 |
+#' @title Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the gene expression for only pathways given in input . |
|
34 |
+#' @description GE_matrix creates a matrix of gene expression for pathways given by the user. |
|
35 |
+#' @param DataMatrix gene expression matrix (eg.TCGA data) |
|
36 |
+#' @param pathway pathway data as provided by getKEGGdata |
|
37 |
+#' @export |
|
38 |
+#' @return a matrix for each pathway ( gene expression level belong to that pathway) |
|
39 |
+#' @examples |
|
40 |
+#' list_path_plot<-GE_matrix(DataMatrix=tumo[,1:2],pathway=path) |
|
41 |
+GE_matrix<-function(DataMatrix,pathway) { |
|
42 |
+ path_name<-sub(' ', '_',colnames(pathway)) |
|
43 |
+d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE") |
|
44 |
+colnames(pathway)<-d_pr |
|
45 |
+zz<-as.data.frame(rowMeans(DataMatrix)) |
|
46 |
+v<-list() |
|
47 |
+for ( k in 1: ncol(pathway)){ |
|
48 |
+ #k=2 |
|
49 |
+ if (length(intersect(rownames(zz),pathway[,k])!=0)){ |
|
50 |
+ print(colnames(path)[k]) |
|
51 |
+ currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k]) |
|
52 |
+ currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,]) |
|
53 |
+ rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common |
|
54 |
+ v[[k]]<- currentPathway_genes_list_common |
|
55 |
+ names(v)[k]<-colnames(pathway)[k] |
|
56 |
+ } |
|
57 |
+} |
|
58 |
+PEAmatrix <- matrix( 0,nrow(DataMatrix),ncol(pathway)) |
|
59 |
+rownames(PEAmatrix) <- as.factor(rownames(DataMatrix)) |
|
60 |
+colnames(PEAmatrix) <- as.factor(colnames(pathway)) |
|
61 |
+for (i in 1:length(v)){ |
|
62 |
+PEAmatrix[v[[i]],i]<-zz[v[[i]],] |
|
63 |
+} |
|
64 |
+PEAmatrix<-PEAmatrix[which(rowSums(PEAmatrix) > 0),] |
|
65 |
+return(PEAmatrix) |
|
66 |
+} |
|
67 |
+ |
|
68 |
+ |
|
69 |
+#' @title Get human KEGG pathway data and a gene expression matrix we obtain a matrix with the gene expression for only pathways given in input . |
|
70 |
+#' @description plotting_matrix creates a matrix of gene expression for pathways given by the user. |
|
71 |
+#' @param DataMatrix gene expression matrix (eg.TCGA data) |
|
72 |
+#' @param pathway pathway data as provided by getKEGGdata |
|
73 |
+#' @param path_matrix output of the function GE_matrix |
|
74 |
+#' @export |
|
75 |
+#' @return a plot for pathway cross talk |
|
76 |
+#' @examples |
|
77 |
+#' mt<-plotting_cross_talk(DataMatrix=tumo[,1:2],pathway=path,path_matrix=list_path_plot) |
|
78 |
+plotting_cross_talk<-function(DataMatrix,pathway,path_matrix){ |
|
79 |
+ zz<-as.data.frame(rowMeans(DataMatrix)) |
|
80 |
+ v<-list() |
|
81 |
+ for ( k in 1: ncol(pathway)){ |
|
82 |
+ path_name<-sub(' ', '_',colnames(pathway)) |
|
83 |
+ d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE") |
|
84 |
+ colnames(pathway)<-d_pr |
|
85 |
+ if (length(intersect(rownames(zz),pathway[,k])!=0)){ |
|
86 |
+ print(colnames(path)[k]) |
|
87 |
+ currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k]) |
|
88 |
+ currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,]) |
|
89 |
+ rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common |
|
90 |
+ v[[k]]<- as.factor(currentPathway_genes_list_common) |
|
91 |
+ names(v)[k]<-colnames(pathway)[k] |
|
92 |
+ } |
|
93 |
+ } |
|
94 |
+ vv<-list() |
|
95 |
+ dc<-cor(t(path_matrix)) |
|
96 |
+ for ( k in 1: length(v)){ |
|
97 |
+ currentPathway_genes_list_common <- intersect(rownames(dc), v[[k]]) |
|
98 |
+ a<-match(currentPathway_genes_list_common,rownames(dc)) |
|
99 |
+ vv[[k]]<- a |
|
100 |
+ names(vv)[k]<-colnames(pathway)[k] |
|
101 |
+ } |
|
102 |
+ list_plt=list(corr=dc,gruppi=vv) |
|
103 |
+ #r<-qgraph(list_plt$corr, groups=list_plt$gruppi, mar=c(1,1,1,1),minimum=0.6) |
|
104 |
+ return(list_plt) |
|
105 |
+} |
|
106 |
+ |
|
107 |
+ |
|
108 |
+ |
|
109 |
+ |
|
110 |
+#' @title For TCGA data get human pathway data and creates a matrix with the average of genes for each pathway. |
|
111 |
+#' @description average creates a matrix with a summarized value for each pathway |
|
112 |
+#' @param dataFilt TCGA matrix |
|
113 |
+#' @param pathway pathway data |
|
114 |
+#' @export |
|
115 |
+#' @return a matrix value for each pathway |
|
116 |
+#' @examples |
|
117 |
+#' score_mean<-average(dataFilt=tumo[,1:2],path) |
|
118 |
+average<-function(dataFilt,pathway){ |
|
119 |
+ DataMatrix<-dataFilt |
|
120 |
+ #dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt)) |
|
121 |
+ #DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),] |
|
122 |
+ #DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) |
|
123 |
+ #rownames(DataMatrix)<-DataMatrix$new.col |
|
124 |
+ #DataMatrix$new.col<-NULL |
|
125 |
+ |
|
126 |
+PEAmatrix <- matrix( 0, ncol(pathway),ncol(DataMatrix)) |
|
127 |
+rownames(PEAmatrix) <- colnames(pathway) |
|
128 |
+colnames(PEAmatrix) <- colnames(DataMatrix) |
|
129 |
+listIPA_pathways<-colnames(pathway) |
|
130 |
+for ( k in 1: nrow(PEAmatrix)){ |
|
131 |
+ #k=1 |
|
132 |
+ currentPathway <- colnames(pathway)[k] |
|
133 |
+ currentPathway_genes_list_common <- intersect(rownames(DataMatrix), currentPathway_genes<-pathway[,k]) |
|
134 |
+ currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,] |
|
135 |
+ SumGenes <- colSums(currentPathway_genes_list_commonMatrix) |
|
136 |
+ AverageGenes <- SumGenes / length(currentPathway_genes_list_common) |
|
137 |
+ PEAmatrix[k,] <- AverageGenes |
|
138 |
+} |
|
139 |
+return(PEAmatrix) |
|
140 |
+} |
|
141 |
+ |
|
142 |
+ |
|
143 |
+ |
|
144 |
+ |
|
145 |
+ |
|
146 |
+ |
|
147 |
+ |
|
148 |
+ |
|
149 |
+ |
|
150 |
+ |
|
151 |
+ |
|
152 |
+ |
|
153 |
+#' @title For TCGA data get human pathway data and creates a measure of cross-talk among pathways |
|
154 |
+#' @description euc_dist_crtlk creates a matrix with euclidean distance for pairwise pathways |
|
155 |
+#' @param dataFilt TCGA matrix |
|
156 |
+#' @param pathway pathway data |
|
157 |
+#' @export |
|
158 |
+#' @return a matrix value for each pathway |
|
159 |
+#' @examples |
|
160 |
+#' score_euc_dista<-euc_dist_crtlk(dataFilt=tumo[,1:2],path) |
|
161 |
+euc_dist_crtlk <- function(dataFilt,pathway){ |
|
162 |
+ PEAmatrix<-average(dataFilt,pathway) |
|
163 |
+ #step 5 distance |
|
164 |
+ # EUCLIDEA DISTANCE |
|
165 |
+ df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway |
|
166 |
+ df=t(df) |
|
167 |
+ ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze |
|
168 |
+ colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti |
|
169 |
+ for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente |
|
170 |
+ patients <- (PEAmatrix)[,p] |
|
171 |
+ distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni |
|
172 |
+ ma_d[,p]<-distance |
|
173 |
+ } |
|
174 |
+ euc_dist<-cbind(df,ma_d) # inserisco label con le relazioni tra i pathway |
|
175 |
+ return(euc_dist) |
|
176 |
+} |
|
177 |
+ |
|
178 |
+ |
|
179 |
+ |
|
180 |
+ |
|
181 |
+#' @title For TCGA data get human pathway data and creates a measure of standard deviations among pathways |
|
182 |
+#' @description st_dv creates a matrix with standard deviation for pathways |
|
183 |
+#' @param DataMatrix TCGA matrix |
|
184 |
+#' @param pathway pathway data |
|
185 |
+#' @export |
|
186 |
+#' @return a matrix value for each pathway |
|
187 |
+#' @examples |
|
188 |
+#' stand_dev<-st_dv(DataMatrix=tumo[,1:2],pathway=path) |
|
189 |
+st_dv<-function(DataMatrix,pathway){ |
|
190 |
+#DataMatrix<-dataFilt |
|
191 |
+ |
|
192 |
+#dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt)) |
|
193 |
+#DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),] |
|
194 |
+#DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) |
|
195 |
+#rownames(DataMatrix)<-DataMatrix$new.col |
|
196 |
+#DataMatrix$new.col<-NULL |
|
197 |
+ |
|
198 |
+PEAmatrix_sd <- matrix( 0, ncol(pathway),ncol(DataMatrix)) |
|
199 |
+rownames(PEAmatrix_sd) <- colnames(pathway) |
|
200 |
+colnames(PEAmatrix_sd) <- colnames(DataMatrix) |
|
201 |
+for ( k in 1: nrow(PEAmatrix_sd)){ |
|
202 |
+ print(colnames(pathway)[k]) |
|
203 |
+ currentPathway <- colnames(pathway)[k] |
|
204 |
+ currentPathway_genes_list_common <- intersect( rownames(DataMatrix), currentPathway_genes<-pathway[,k]) |
|
205 |
+ currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,] |
|
206 |
+ stdev<-apply(currentPathway_genes_list_commonMatrix,2,sd) #deviazione standard dei pathway |
|
207 |
+ PEAmatrix_sd[k,] <- stdev |
|
208 |
+ } |
|
209 |
+return(PEAmatrix_sd) |
|
210 |
+} |
|
211 |
+ |
|
212 |
+ |
|
213 |
+ |
|
214 |
+ |
|
215 |
+ |
|
216 |
+ |
|
217 |
+#' @title For TCGA data get human pathway data and creates a measure of discriminating score among pathways |
|
218 |
+#' @description ds_score_crtlk creates a matrix with discriminating score for pathways |
|
219 |
+#' @param dataFilt TCGA matrix |
|
220 |
+#' @param pathway pathway data |
|
221 |
+#' @export |
|
222 |
+#' @return a matrix value for each pathway |
|
223 |
+#' @examples |
|
224 |
+#' cross_talk_st_dv<-ds_score_crtlk(dataFilt=tumo[,1:2],pathway=path) |
|
225 |
+ds_score_crtlk<-function(dataFilt,pathway){ |
|
226 |
+ PEAmatrix<-average(dataFilt,pathway) |
|
227 |
+ #step 5 distance |
|
228 |
+ # EUCLIDEA DISTANCE |
|
229 |
+ df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway |
|
230 |
+ df=t(df) |
|
231 |
+ ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze |
|
232 |
+ colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti |
|
233 |
+ for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente |
|
234 |
+ patients <- (PEAmatrix)[,p] |
|
235 |
+ distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni |
|
236 |
+ ma_d[,p]<-distance |
|
237 |
+ } |
|
238 |
+ PEAmatrix_sd<-st_dv(dataFilt,pathway) |
|
239 |
+ df=combn(rownames(PEAmatrix_sd),2) |
|
240 |
+ df=t(df) |
|
241 |
+ ma<-matrix(0,nrow(df),ncol(PEAmatrix_sd)) # creo matrix che conterr? le somme delle dev st |
|
242 |
+ colnames(ma)<-colnames(PEAmatrix_sd) # colnames conterr? il nome dei pazienti |
|
243 |
+ for ( p in 1: ncol(PEAmatrix_sd)){ # per ogni paziente |
|
244 |
+ patients <- (PEAmatrix_sd)[,p] |
|
245 |
+ out <- apply(df, 1, function(x) sum(patients[x])) # calcolo somma delle dev standard tra le possibili combinazioni |
|
246 |
+ ma[,p]<-out |
|
247 |
+ } |
|
248 |
+ score<-ma_d/ma # discriminating score M1-M2/S1+S2 |
|
249 |
+ score<- cbind(df,score) |
|
250 |
+return(score) |
|
251 |
+} |
|
252 |
+ |
|
253 |
+ |
|
254 |
+ |
|
255 |
+#' @title SVM classification for each feature |
|
256 |
+#' @description svm class creates a list with auc value |
|
257 |
+#' @param TCGA_matrix gene expression matrix |
|
258 |
+#' @param nfs nfs split data into a training and test set |
|
259 |
+#' @param tumour barcode samples for a class |
|
260 |
+#' @param normal barcode samples for another class |
|
261 |
+#' @export |
|
262 |
+#' @importFrom e1071 tune svm |
|
263 |
+#' @importFrom ROCR prediction performance |
|
264 |
+#' @importFrom grDevices rainbow |
|
265 |
+#' @return a list with AUC value for pairwise pathway |
|
266 |
+#' @examples |
|
267 |
+#' nf <- 60 |
|
268 |
+#' res_class<-svm_classification(TCGA_matrix=score_euc_dist,nfs=nf, |
|
269 |
+#' normal=colnames(norm[,1:10]),tumour=colnames(tumo[,1:10])) |
|
270 |
+svm_classification<-function(TCGA_matrix,tumour,normal,nfs){ |
|
271 |
+ #library("e1071") |
|
272 |
+ #library(ROCR) |
|
273 |
+ |
|
274 |
+ scoreMatrix <- as.data.frame(TCGA_matrix[,3:ncol(TCGA_matrix)]) |
|
275 |
+ scoreMatrix <-as.data.frame(scoreMatrix) |
|
276 |
+ for( i in 1: ncol(scoreMatrix)){ |
|
277 |
+ scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i])) |
|
278 |
+ } |
|
279 |
+ |
|
280 |
+ TCGA_matrix[,1] <- gsub(" ", "_", TCGA_matrix[,1]) |
|
281 |
+ d<-sub('_-_Homo_sapiens_*', '', TCGA_matrix[,1]) |
|
282 |
+ #d_pr<-sub(')*', '', DataMatrix[,1]) |
|
283 |
+ |
|
284 |
+ d_pr<- gsub("(human)", "", d, fixed="TRUE") |
|
285 |
+ d_pr <- gsub("_", "", d_pr) |
|
286 |
+ d_pr <- gsub("-", "", d_pr) |
|
287 |
+ |
|
288 |
+ TCGA_matrix[,2] <- gsub(" ", "_", TCGA_matrix[,2]) |
|
289 |
+ d2<-sub('_-_Homo_sapiens_(human)*', '', TCGA_matrix[,2]) |
|
290 |
+ d_pr2<- gsub("(human)", "", d2, fixed="TRUE") |
|
291 |
+ d_pr2 <- gsub("_", "", d_pr2) |
|
292 |
+ d_pr2 <- gsub("-", "", d_pr2) |
|
293 |
+ |
|
294 |
+ PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" ) |
|
295 |
+ |
|
296 |
+ rownames(scoreMatrix) <-PathwaysPair |
|
297 |
+ |
|
298 |
+ |
|
299 |
+ tDataMatrix<-as.data.frame(t(scoreMatrix)) |
|
300 |
+ #tDataMatrix$Target[,1]<-0 |
|
301 |
+ |
|
302 |
+ tDataMatrix<-cbind(Target=0,tDataMatrix ) |
|
303 |
+ |
|
304 |
+ tum<-intersect(rownames(tDataMatrix),tumour) |
|
305 |
+ nor<-intersect(rownames(tDataMatrix),normal) |
|
306 |
+ #tDataMatrix$ |
|
307 |
+ |
|
308 |
+ Dataset_g1<-tDataMatrix[nor,] |
|
309 |
+ Dataset_g3<- tDataMatrix[tum,] |
|
310 |
+ |
|
311 |
+ |
|
312 |
+#training=read.table('C:/Users/UserInLab05/Desktop/trai.txt',header = TRUE) |
|
313 |
+#testset=read.table('C:/Users/UserInLab05/Desktop/test.txt',header = TRUE) |
|
314 |
+ |
|
315 |
+ Dataset_g1$Target <- 0 |
|
316 |
+ Dataset_g3$Target<-1 |
|
317 |
+#Dataset_g3 <- Dataset_g3[Dataset_g3$Target <- 1, ] |
|
318 |
+ |
|
319 |
+tab_g1_training <- sample(rownames(Dataset_g1),round(nrow(Dataset_g1) / 100 * nfs )) |
|
320 |
+tab_g3_training <- sample(rownames(Dataset_g3),round(nrow(Dataset_g3) / 100 * nfs )) |
|
321 |
+tab_g1_testing <- setdiff(rownames(Dataset_g1),tab_g1_training) |
|
322 |
+tab_g3_testing <- setdiff(rownames(Dataset_g3),tab_g3_training) |
|
323 |
+ |
|
324 |
+FR<-intersect(rownames(Dataset_g1),tab_g1_training) |
|
325 |
+ |
|
326 |
+#rownames(Dataset_g1)<-Dataset_g1[,1] |
|
327 |
+G1<-Dataset_g1[FR,] |
|
328 |
+ |
|
329 |
+FR1<-intersect(rownames(Dataset_g3),tab_g3_training) |
|
330 |
+#rownames(Dataset_g3)<-Dataset_g3$ID |
|
331 |
+ |
|
332 |
+G3<-Dataset_g3[FR1,] |
|
333 |
+training<-rbind(G1,G3) |
|
334 |
+ |
|
335 |
+inter1<-intersect(rownames(Dataset_g1),tab_g1_testing) |
|
336 |
+#rownames(Dataset_g1)<-Dataset_g1$ID |
|
337 |
+ |
|
338 |
+G1_testing<-Dataset_g1[inter1,] |
|
339 |
+ |
|
340 |
+inter2<-intersect(rownames(Dataset_g3),tab_g3_testing) |
|
341 |
+#rownames(Dataset_g3)<-Dataset_g3$ID |
|
342 |
+G3_testing<-Dataset_g3[inter2,] |
|
343 |
+ |
|
344 |
+testing<-rbind(G1_testing,G3_testing) |
|
345 |
+ |
|
346 |
+x <- subset(training, select=-Target) |
|
347 |
+y <- training$Target |
|
348 |
+#testing[,2]<-NULL |
|
349 |
+z<-subset(testing, select=-Target) |
|
350 |
+ |
|
351 |
+zi<-testing$Target |
|
352 |
+ |
|
353 |
+auc.df<-list() |
|
354 |
+svm_model_after_tune_COMPL<-list() |
|
355 |
+for( k in 2: ncol(training)){ |
|
356 |
+ print(colnames(training)[k]) |
|
357 |
+ svm_tune <- tune(svm, train.x=x, train.y=y, |
|
358 |
+ kernel="radial", ranges=list(cost=10^(-1:2), gamma=c(.5,1,2)),cross=10) |
|
359 |
+ #print(svm_tune) |
|
360 |
+ |
|
361 |
+ svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters$cost, gamma=svm_tune$best.parameters$gamma,cross=10,probability = TRUE) |
|
362 |
+ |
|
363 |
+ |
|
364 |
+ #svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters[1], gamma=svm_tune$best.parameters[2],cross=10,probability = TRUE) |
|
365 |
+ #summary(svm_model_after_tune) |
|
366 |
+ |
|
367 |
+ j=k-1 |
|
368 |
+ z2=z[,j] |
|
369 |
+ z3<-as.data.frame(z2) |
|
370 |
+ #rownames(z3)<-rownames(z) |
|
371 |
+ #colnames(z3)<-as.character(paste("X",j,sep = "")) |
|
372 |
+ colnames(z3)<-colnames(z)[j] |
|
373 |
+ #classifiersMatrix <- c(classifiersMatrix,svm_model_after_tune) |
|
374 |
+ pred <- predict(svm_model_after_tune,z3,decision.values=TRUE,cross=10) |
|
375 |
+ |
|
376 |
+ #a<-table(pred,zi) |
|
377 |
+ svm.roc <- prediction(attributes(pred)$decision.values, zi) |
|
378 |
+ svm.auc <- performance(svm.roc, 'tpr', 'fpr') |
|
379 |
+ |
|
380 |
+ perf <- performance(svm.roc, "auc") |
|
381 |
+ auc<-perf@y.values[[1]] |
|
382 |
+ |
|
383 |
+ auc.df[[j]]<- auc |
|
384 |
+ svm_model_after_tune_COMPL[[j]]<-svm_model_after_tune |
|
385 |
+ |
|
386 |
+ palette <- as.matrix(rainbow(ncol(z))) |
|
387 |
+ #print(j) |
|
388 |
+ if (j >1 & j < 6) { |
|
389 |
+ plot(svm.auc,col=palette[j], add=TRUE) |
|
390 |
+ legend('bottomright', colnames(z), |
|
391 |
+ lty=1, col=palette, bty='n', cex=.90,pch = 20,ncol=1) |
|
392 |
+ |
|
393 |
+ |
|
394 |
+ } |
|
395 |
+ else { |
|
396 |
+ plot(svm.auc, col=palette[j]) |
|
397 |
+ |
|
398 |
+ |
|
399 |
+ } |
|
400 |
+ |
|
401 |
+} |
|
402 |
+names(auc.df) <- colnames(z) |
|
403 |
+return(auc.df) |
|
404 |
+} |
|
405 |
+ |
14 | 23 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,32 @@ |
1 |
+citHeader("To cite StarBioTrek in publications use:") |
|
2 |
+ |
|
3 |
+citEntry(entry = "article", |
|
4 |
+ title = "StarBioTrek |
|
5 |
+ miRNA data", |
|
6 |
+ author = personList(as.person("Claudia Cava"), |
|
7 |
+ as.person("Isabella Castiglioni") |
|
8 |
+ ), |
|
9 |
+ journal = "manuscript in preparation", |
|
10 |
+ year = "2016", |
|
11 |
+ textVersion = |
|
12 |
+ paste("Claudia Cava, Isabella Castiglioni (2016).", |
|
13 |
+ "StarBioTrek") |
|
14 |
+) |
|
15 |
+ |
|
16 |
+citEntry(entry = "article", |
|
17 |
+ title = "Integrating genetics and epigenetics in |
|
18 |
+breast cancer: biological insights, experimental, computational methods and |
|
19 |
+therapeutic potential.", |
|
20 |
+ author = personList(as.person("Claudia Cava"), |
|
21 |
+ as.person("Gloria Bertoli"), |
|
22 |
+ as.person("Isabella Castiglioni") |
|
23 |
+ ), |
|
24 |
+ journal = "BMC Syst Biol", |
|
25 |
+ year = "2015", |
|
26 |
+ volume = 9, |
|
27 |
+ number = 62, |
|
28 |
+ textVersion = |
|
29 |
+ paste("Cava C, Bertoli G, Castiglioni I. Integrating genetics and epigenetics in |
|
30 |
+breast cancer: biological insights, experimental, computational methods and |
|
31 |
+therapeutic potential. BMC Syst Biol.2015;9:62") |
|
32 |
+) |
0 | 33 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,12 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/StarBioTrek.r |
|
3 |
+\docType{data} |
|
4 |
+\name{Data_CANCER_normUQ_filt} |
|
5 |
+\alias{Data_CANCER_normUQ_filt} |
|
6 |
+\title{TCGA data} |
|
7 |
+\format{A data frame with rows and variables} |
|
8 |
+\description{ |
|
9 |
+TCGA data |
|
10 |
+} |
|
11 |
+\keyword{internal} |
|
12 |
+ |
0 | 13 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,23 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/path_star.R |
|
3 |
+\name{GE_matrix} |
|
4 |
+\alias{GE_matrix} |
|
5 |
+\title{Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the gene expression for only pathways given in input .} |
|
6 |
+\usage{ |
|
7 |
+GE_matrix(DataMatrix, pathway) |
|