1 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,40 +0,0 @@ |
1 |
-Package: StarBioTrek |
|
2 |
-Type: Package |
|
3 |
-Title: StarBioTrek |
|
4 |
-Version: 1.3.1 |
|
5 |
-Date: 06-05-2017 |
|
6 |
-Author: Claudia Cava, |
|
7 |
- Isabella Castiglioni |
|
8 |
-Maintainer: Claudia Cava <claudia.cava@ibfm.cnr.it> |
|
9 |
-Depends: |
|
10 |
- R (>= 3.3) |
|
11 |
-Imports: |
|
12 |
- SpidermiR, |
|
13 |
- KEGGREST, |
|
14 |
- org.Hs.eg.db, |
|
15 |
- AnnotationDbi, |
|
16 |
- e1071, |
|
17 |
- ROCR, |
|
18 |
- grDevices, |
|
19 |
- igraph |
|
20 |
-Description: This tool StarBioTrek presents some methodologies to measure pathway activity and cross-talk among pathways integrating also the information of network data. |
|
21 |
-License: GPL (>= 3) |
|
22 |
-biocViews: GeneRegulation, |
|
23 |
- Network, |
|
24 |
- Pathways, |
|
25 |
- KEGG |
|
26 |
-Suggests: |
|
27 |
- BiocStyle, |
|
28 |
- knitr, |
|
29 |
- rmarkdown, |
|
30 |
- testthat, |
|
31 |
- devtools, |
|
32 |
- roxygen2, |
|
33 |
- qgraph, |
|
34 |
- png, |
|
35 |
- grid |
|
36 |
-VignetteBuilder: knitr |
|
37 |
-LazyData: true |
|
38 |
-URL: https://github.com/claudiacava/StarBioTrek |
|
39 |
-BugReports: https://github.com/claudiacava/StarBioTrek/issues |
|
40 |
-RoxygenNote: 6.0.1 |
41 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,33 +0,0 @@ |
1 |
-# Generated by roxygen2: do not edit by hand |
|
2 |
- |
|
3 |
-export(GE_matrix) |
|
4 |
-export(SelectedSample) |
|
5 |
-export(average) |
|
6 |
-export(ds_score_crtlk) |
|
7 |
-export(euc_dist_crtlk) |
|
8 |
-export(getKEGGdata) |
|
9 |
-export(getNETdata) |
|
10 |
-export(list_path_net) |
|
11 |
-export(matrix_plot) |
|
12 |
-export(path_net) |
|
13 |
-export(plotting_cross_talk) |
|
14 |
-export(proc_path) |
|
15 |
-export(st_dv) |
|
16 |
-export(svm_classification) |
|
17 |
-importFrom(AnnotationDbi,as.list) |
|
18 |
-importFrom(AnnotationDbi,mappedkeys) |
|
19 |
-importFrom(KEGGREST,keggGet) |
|
20 |
-importFrom(KEGGREST,keggList) |
|
21 |
-importFrom(ROCR,performance) |
|
22 |
-importFrom(ROCR,prediction) |
|
23 |
-importFrom(SpidermiR,SpidermiRdownload_net) |
|
24 |
-importFrom(SpidermiR,SpidermiRprepare_NET) |
|
25 |
-importFrom(SpidermiR,SpidermiRquery_spec_networks) |
|
26 |
-importFrom(SpidermiR,SpidermiRquery_species) |
|
27 |
-importFrom(e1071,svm) |
|
28 |
-importFrom(e1071,tune) |
|
29 |
-importFrom(grDevices,rainbow) |
|
30 |
-importFrom(igraph,get.data.frame) |
|
31 |
-importFrom(igraph,graph.data.frame) |
|
32 |
-importFrom(igraph,induced.subgraph) |
|
33 |
-importFrom(org.Hs.eg.db,org.Hs.egSYMBOL2EG) |
6 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,64 +0,0 @@ |
1 |
-#' Download data |
|
2 |
-#' |
|
3 |
-#' StarBioTrek allows you to Download data of samples from StarBioTrek |
|
4 |
-#' |
|
5 |
-#' The functions you're likely to need from \pkg{StarBioTrek} is |
|
6 |
-#' \code{path_star} |
|
7 |
-#'Otherwise refer to the vignettes to see |
|
8 |
-#' how to format the documentation. |
|
9 |
-#' |
|
10 |
-#' @docType package |
|
11 |
-#' @name StarBioTrek |
|
12 |
-NULL |
|
13 |
- |
|
14 |
-#' Pathway data from KEGG |
|
15 |
-#' @docType data |
|
16 |
-#' @keywords internal |
|
17 |
-#' @name path |
|
18 |
-#' @format A data frame with rows and variables |
|
19 |
-NULL |
|
20 |
- |
|
21 |
-#' network data |
|
22 |
-#' @docType data |
|
23 |
-#' @keywords internal |
|
24 |
-#' @name netw |
|
25 |
-#' @format A data frame with rows and variables |
|
26 |
-NULL |
|
27 |
- |
|
28 |
- |
|
29 |
- |
|
30 |
- |
|
31 |
-#' TCGA data |
|
32 |
-#' @docType data |
|
33 |
-#' @keywords internal |
|
34 |
-#' @name Data_CANCER_normUQ_filt |
|
35 |
-#' @format A data frame with rows and variables |
|
36 |
-NULL |
|
37 |
- |
|
38 |
-#' Score Matrix of pairwise pathway using euclidean distance |
|
39 |
-#' @docType data |
|
40 |
-#' @keywords internal |
|
41 |
-#' @name score_euc_dist |
|
42 |
-#' @format A data frame with rows and variables |
|
43 |
-NULL |
|
44 |
- |
|
45 |
-#' TCGA data with normal samples |
|
46 |
-#' @docType data |
|
47 |
-#' @keywords internal |
|
48 |
-#' @name norm |
|
49 |
-#' @format A data frame with rows and variables |
|
50 |
-NULL |
|
51 |
- |
|
52 |
-#' TCGA data with tumour samples |
|
53 |
-#' @docType data |
|
54 |
-#' @keywords internal |
|
55 |
-#' @name tumo |
|
56 |
-#' @format A data frame with rows and variables |
|
57 |
-NULL |
|
58 |
- |
|
59 |
-#' A matrix of gene expression for pathways given by the user. |
|
60 |
-#' @docType data |
|
61 |
-#' @keywords internal |
|
62 |
-#' @name list_path_plot |
|
63 |
-#' @format A data frame with rows and variables |
|
64 |
-NULL |
|
65 | 0 |
\ No newline at end of file |
66 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,207 +0,0 @@ |
1 |
-#' @title Get human KEGG pathway data. |
|
2 |
-#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway |
|
3 |
-#' @param KEGG_path variable |
|
4 |
-#' @export |
|
5 |
-#' @importFrom KEGGREST keggList keggGet |
|
6 |
-#' @importFrom org.Hs.eg.db org.Hs.egSYMBOL2EG |
|
7 |
-#' @importFrom AnnotationDbi mappedkeys as.list |
|
8 |
-#' @return dataframe with human pathway data |
|
9 |
-#' @examples |
|
10 |
-#' path<-getKEGGdata(KEGG_path="Transcript") |
|
11 |
-getKEGGdata<-function(KEGG_path){ |
|
12 |
-if (KEGG_path=="Carb_met") { |
|
13 |
- mer<-select_path_carb(Carbohydrate) |
|
14 |
- c<-proc_path(mer) |
|
15 |
- a<-c[[2]] |
|
16 |
-} |
|
17 |
- if (KEGG_path=="Ener_met") { |
|
18 |
- mer<-select_path_en(Energy) |
|
19 |
- c<-proc_path(mer) |
|
20 |
- a<-c[[2]] |
|
21 |
- } |
|
22 |
- if (KEGG_path=="Lip_met") { |
|
23 |
- mer<-select_path_lip(Lipid) |
|
24 |
- c<-proc_path(mer) |
|
25 |
- a<-c[[2]] |
|
26 |
- } |
|
27 |
- if (KEGG_path=="Amn_met") { |
|
28 |
- mer<-select_path_amn(Aminoacid) |
|
29 |
- c<-proc_path(mer) |
|
30 |
- a<-c[[2]] |
|
31 |
- } |
|
32 |
- if (KEGG_path=="Gly_bio_met") { |
|
33 |
- mer<-select_path_gly(Glybio_met) |
|
34 |
- c<-proc_path(mer) |
|
35 |
- a<-c[[2]] |
|
36 |
- } |
|
37 |
- if (KEGG_path=="Cof_vit_met") { |
|
38 |
- mer<-select_path_cofa(Cofa_vita_met) |
|
39 |
- c<-proc_path(mer) |
|
40 |
- a<-c[[2]] |
|
41 |
- } |
|
42 |
- if (KEGG_path=="Transcript") { |
|
43 |
- mer<-select_path_transc(Transcription) |
|
44 |
- c<-proc_path(mer) |
|
45 |
- a<-c[[2]] |
|
46 |
- } |
|
47 |
- if (KEGG_path=="Transl") { |
|
48 |
- mer<-select_path_transl(Translation) |
|
49 |
- c<-proc_path(mer) |
|
50 |
- a<-c[[2]] |
|
51 |
- } |
|
52 |
- if (KEGG_path=="Fold_degr") { |
|
53 |
- mer<-select_path_fold(Folding_sorting_and_degradation) |
|
54 |
- c<-proc_path(mer) |
|
55 |
- a<-c[[2]] |
|
56 |
- } |
|
57 |
- if (KEGG_path=="Repl_repair") { |
|
58 |
- mer<-select_path_repl(Replication_and_repair) |
|
59 |
- c<-proc_path(mer) |
|
60 |
- a<-c[[2]] |
|
61 |
- } |
|
62 |
- if (KEGG_path=="sign_transd") { |
|
63 |
- mer<-select_path_sign(Signal_transduction) |
|
64 |
- c<-proc_path(mer) |
|
65 |
- a<-c[[2]] |
|
66 |
- } |
|
67 |
- if (KEGG_path=="sign_mol_int") { |
|
68 |
- mer<-select_path_sign_mol(Signaling_molecules_and_interaction) |
|
69 |
- c<-proc_path(mer) |
|
70 |
- a<-c[[2]] |
|
71 |
- } |
|
72 |
- if (KEGG_path=="Transp_cat") { |
|
73 |
- mer<-select_path_transp_ca(Transport_and_catabolism) |
|
74 |
- c<-proc_path(mer) |
|
75 |
- a<-c[[2]] |
|
76 |
- } |
|
77 |
- if (KEGG_path=="cell_grow_d") { |
|
78 |
- mer<-select_path_cell_grow(Cell_growth_and_death) |
|
79 |
- c<-proc_path(mer) |
|
80 |
- a<-c[[2]] |
|
81 |
- } |
|
82 |
- if (KEGG_path=="cell_comm") { |
|
83 |
- mer<-select_path_cell_comm(Cellular_community) |
|
84 |
- c<-proc_path(mer) |
|
85 |
- a<-c[[2]] |
|
86 |
- } |
|
87 |
- if (KEGG_path=="imm_syst") { |
|
88 |
- mer<-select_path_imm_syst(Immune_system) |
|
89 |
- c<-proc_path(mer) |
|
90 |
- a<-c[[2]] |
|
91 |
- } |
|
92 |
- if (KEGG_path=="end_syst") { |
|
93 |
- mer<-select_path_end_syst(Endocrine_system) |
|
94 |
- c<-proc_path(mer) |
|
95 |
- a<-c[[2]] |
|
96 |
- } |
|
97 |
- if (KEGG_path=="circ_syst") { |
|
98 |
- mer<-select_path_circ_syst(Circulatory_system) |
|
99 |
- c<-proc_path(mer) |
|
100 |
- a<-c[[2]] |
|
101 |
- } |
|
102 |
- if (KEGG_path=="dig_syst") { |
|
103 |
- mer<-select_path_dig_syst(Digestive_system) |
|
104 |
- c<-proc_path(mer) |
|
105 |
- a<-c[[2]] |
|
106 |
- } |
|
107 |
- if (KEGG_path=="exc_syst") { |
|
108 |
- mer<-select_path_exc_syst(Excretory_system) |
|
109 |
- c<-proc_path(mer) |
|
110 |
- a<-c[[2]] |
|
111 |
- } |
|
112 |
- if (KEGG_path=="nerv_syst") { |
|
113 |
- mer<-select_path_ner_syst(Nervous_system) |
|
114 |
- c<-proc_path(mer) |
|
115 |
- a<-c[[2]] |
|
116 |
- } |
|
117 |
- if (KEGG_path=="sens_syst") { |
|
118 |
- mer<-select_path_sens_syst(Sensory_system) |
|
119 |
- c<-proc_path(mer) |
|
120 |
- a<-c[[2]] |
|
121 |
- } |
|
122 |
-if (KEGG_path=="KEGG_path") { |
|
123 |
- pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways |
|
124 |
-pathway.codes <- sub("path:", "", names(pathways.list)) |
|
125 |
-pathways.list<-list(pathways.list) |
|
126 |
-pathways.list<-pathways.list[lapply(pathways.list,length)!=0] |
|
127 |
-list_pathkeg<-do.call("cbind", pathways.list) |
|
128 |
-c<-list(pathway.codes,list_pathkeg) |
|
129 |
-a<-c[[2]] |
|
130 |
- |
|
131 |
-} |
|
132 |
-pathway.codes<-c[[1]] |
|
133 |
-genes.by.pathway <- sapply(pathway.codes, |
|
134 |
- function(pwid){ |
|
135 |
- pw <- keggGet(pwid) |
|
136 |
- pw[[1]]$GENE[c(TRUE, FALSE)] |
|
137 |
- }) |
|
138 |
-x <- org.Hs.egSYMBOL2EG |
|
139 |
-mapped_genes <- mappedkeys(x) |
|
140 |
-xx <- as.list(x[mapped_genes]) |
|
141 |
-top3 <- matrix(0, length(xx), length(genes.by.pathway)) |
|
142 |
-rownames(top3) <- names(xx) |
|
143 |
-colnames(top3)<- names(genes.by.pathway) |
|
144 |
-for (j in 1:length(xx)){ |
|
145 |
- for (k in 1:length(genes.by.pathway)){ |
|
146 |
- if (length(intersect(xx[[j]],genes.by.pathway[[k]])!=0)){ |
|
147 |
- |
|
148 |
- top3[j,k]<-names(xx[j]) |
|
149 |
- } |
|
150 |
- } |
|
151 |
-} |
|
152 |
-top3[top3 == 0] <- " " |
|
153 |
-#a<-data.frame(pathways.list) |
|
154 |
-#i <- sapply(a, is.factor) |
|
155 |
-#a[i] <- lapply(a[i], as.character) |
|
156 |
-rownames(a)<-sub("path:","",rownames(a)) |
|
157 |
-PROVA<-top3 |
|
158 |
-for( i in 1:ncol(PROVA)) { |
|
159 |
- if (colnames(PROVA)[i]==rownames(a)[i]){ |
|
160 |
- colnames(PROVA)[i]<-a[i] |
|
161 |
-} |
|
162 |
-} |
|
163 |
-return(PROVA) |
|
164 |
-} |
|
165 |
- |
|
166 |
- |
|
167 |
-#' @title Get network data. |
|
168 |
-#' @description getNETdata creates a data frame with network data. |
|
169 |
-#' Network category can be filtered among: physical interactions, co-localization, genetic interactions and shared protein domain. |
|
170 |
-#' @param network variable. The user can use the following parameters |
|
171 |
-#' based on the network types to be used. PHint for Physical_interactions, |
|
172 |
-#' COloc for Co-localization, GENint for Genetic_interactions and |
|
173 |
-#' SHpd for Shared_protein_domains |
|
174 |
-#' @param organism organism==NULL default value is homo sapiens |
|
175 |
-#' @export |
|
176 |
-#' @importFrom SpidermiR SpidermiRquery_species SpidermiRquery_spec_networks SpidermiRdownload_net SpidermiRprepare_NET |
|
177 |
-#' @return dataframe with gene-gene (or protein-protein interactions) |
|
178 |
-#' @examples |
|
179 |
-#' organism="Saccharomyces_cerevisiae" |
|
180 |
-#' netw<-getNETdata(network="SHpd",organism) |
|
181 |
-getNETdata<-function(network,organism=NULL){ |
|
182 |
- org_shar_pro<-SpidermiRquery_species(species) |
|
183 |
- if (is.null(organism)) { |
|
184 |
- net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[6,],network) |
|
185 |
- out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot) |
|
186 |
- geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[6,],data = out_net_shar_pro) |
|
187 |
- } |
|
188 |
- if( !is.null(organism) ){ |
|
189 |
- net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[9,],network) |
|
190 |
- out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot) |
|
191 |
- geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[9,],data = out_net_shar_pro) |
|
192 |
-} |
|
193 |
- ds_shar_pro<-do.call("rbind", geneSymb_net_shar_pro) |
|
194 |
- data_shar_pro<-as.data.frame(ds_shar_pro[!duplicated(ds_shar_pro), ]) |
|
195 |
- sdc_shar_pro<-unlist(data_shar_pro$gene_symbolA,data_shar_pro$gene_symbolB) |
|
196 |
- m_shar_pro<-c(data_shar_pro$gene_symbolA) |
|
197 |
- m2_shar_pro<-c(data_shar_pro$gene_symbolB) |
|
198 |
- ss_shar_pro<-cbind(m_shar_pro,m2_shar_pro) |
|
199 |
- data_pr_shar_pro<-as.data.frame(ss_shar_pro[!duplicated(ss_shar_pro), ]) |
|
200 |
- colnames(data_pr_shar_pro) <- c("m_shar_pro", "m2_shar_pro") |
|
201 |
-return(data_pr_shar_pro) |
|
202 |
-} |
|
203 |
- |
|
204 |
- |
|
205 |
- |
|
206 |
- |
|
207 |
- |
208 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,525 +0,0 @@ |
1 |
- |
|
2 |
- |
|
3 |
- |
|
4 |
-select_path_carb<-function(Carbohydrate){ |
|
5 |
-species<-c("- Homo sapiens (human)") |
|
6 |
-a<-paste("Glycolysis / Gluconeogenesis", species) |
|
7 |
-b<-paste("Citrate cycle (TCA cycle)", species) |
|
8 |
-c<-paste("Pentose phosphate pathway", species) |
|
9 |
-d<-paste("Pentose and glucuronate interconversions", species) |
|
10 |
-e<-paste("Fructose and mannose metabolism", species) |
|
11 |
-f<-paste("Galactose metabolism", species) |
|
12 |
-g<-paste("Ascorbate and aldarate metabolism", species) |
|
13 |
-h<-paste("Starch and sucrose metabolism", species) |
|
14 |
-i<-paste("Amino sugar and nucleotide sugar metabolism", species) |
|
15 |
-l<-paste("Pyruvate metabolism", species) |
|
16 |
-m<-paste("Glyoxylate and dicarboxylate metabolism", species) |
|
17 |
-n<-paste("Propanoate metabolism", species) |
|
18 |
-o<-paste("Butanoate metabolism", species) |
|
19 |
-p<-paste("C5-Branched dibasic acid metabolism", species) |
|
20 |
-q<-paste("Inositol phosphate metabolism", species) |
|
21 |
-r<-paste("Enzymes", species) |
|
22 |
-s<-paste("Compounds with biological roles",species) |
|
23 |
-mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s) |
|
24 |
-return(mer) |
|
25 |
-} |
|
26 |
- |
|
27 |
-select_path_en<-function(Energy){ |
|
28 |
- species<-c("- Homo sapiens (human)") |
|
29 |
- r<-paste("Oxidative phosphorylation", species) |
|
30 |
- s<-paste("Photosynthesis", species) |
|
31 |
- t<-paste("Photosynthesis - antenna proteins", species) |
|
32 |
- v<-paste("Carbon fixation in photosynthetic organisms", species) |
|
33 |
- u<-paste("Carbon fixation pathways in prokaryotes", species) |
|
34 |
- z<-paste("Methane metabolism", species) |
|
35 |
- aa<-paste("Nitrogen metabolism", species) |
|
36 |
- ab<-paste("Sulfur metabolism", species) |
|
37 |
- mer<-c(r,s,t,v,u,z,aa,ab) |
|
38 |
- return(mer) |
|
39 |
-} |
|
40 |
- |
|
41 |
- |
|
42 |
-select_path_lip<-function(Lipid){ |
|
43 |
- species<-c("- Homo sapiens (human)") |
|
44 |
-ac<-paste("Fatty acid biosynthesis", species) |
|
45 |
-ad<-paste("Fatty acid elongation", species) |
|
46 |
-ae<-paste("Fatty acid degradation", species) |
|
47 |
-af<-paste("Synthesis and degradation of ketone bodies", species) |
|
48 |
-ag<-paste("Cutin, suberine and wax biosynthesis", species) |
|
49 |
-ah<-paste("Steroid biosynthesis", species) |
|
50 |
-ai<-paste("Primary bile acid biosynthesis", species) |
|
51 |
-al<-paste("Secondary bile acid biosynthesis", species) |
|
52 |
-am<-paste("Steroid hormone biosynthesis", species) |
|
53 |
-an<-paste("Glycerolipid metabolism", species) |
|
54 |
-ao<-paste("Glycerophospholipid metabolism", species) |
|
55 |
-ap<-paste("Ether lipid metabolism", species) |
|
56 |
-aq<-paste("Sphingolipid metabolism", species) |
|
57 |
-ar<-paste("Arachidonic acid metabolism", species) |
|
58 |
-as<-paste("Linoleic acid metabolism", species) |
|
59 |
-at<-paste("alpha-Linolenic acid metabolism", species) |
|
60 |
-av<-paste("Biosynthesis of unsaturated fatty acids", species) |
|
61 |
- |
|
62 |
-mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av) |
|
63 |
-return(mer) |
|
64 |
-} |
|
65 |
- |
|
66 |
- |
|
67 |
- |
|
68 |
- |
|
69 |
-select_path_amn<-function(Aminoacid){ |
|
70 |
- species<-c("- Homo sapiens (human)") |
|
71 |
-ac<-paste("Alanine, aspartate and glutamate metabolism", species) |
|
72 |
-ad<-paste("Glycine, serine and threonine metabolism", species) |
|
73 |
-ae<-paste("Cysteine and methionine metabolism", species) |
|
74 |
-af<-paste("Valine, leucine and isoleucine degradation", species) |
|
75 |
-ag<-paste("Valine, leucine and isoleucine biosynthesis", species) |
|
76 |
-ah<-paste("Lysine biosynthesis", species) |
|
77 |
-ai<-paste("Lysine degradation", species) |
|
78 |
-al<-paste("Arginine biosynthesis", species) |
|
79 |
-am<-paste("Arginine and proline metabolism", species) |
|
80 |
-an<-paste("Histidine metabolism", species) |
|
81 |
-ao<-paste("Tyrosine metabolism", species) |
|
82 |
-ap<-paste("Phenylalanine metabolism", species) |
|
83 |
-aq<-paste("Tryptophan metabolism", species) |
|
84 |
-ar<-paste("Phenylalanine, tyrosine and tryptophan biosynthesis", species) |
|
85 |
-as<-paste("beta-Alanine metabolism", species) |
|
86 |
-at<-paste("Taurine and hypotaurine metabolism", species) |
|
87 |
-av<-paste("Phosphonate and phosphinate metabolism", species) |
|
88 |
-au<-paste("Selenocompound metabolism", species) |
|
89 |
-az<-paste("Cyanoamino acid metabolism", species) |
|
90 |
-a<-paste("D-Glutamine and D-glutamate metabolism", species) |
|
91 |
-b<-paste("D-Arginine and D-ornithine metabolism", species) |
|
92 |
-c<-paste("D-Alanine metabolism", species) |
|
93 |
-d<-paste("Glutathione metabolism", species) |
|
94 |
- |
|
95 |
-mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av,au,az,a,b,c,d) |
|
96 |
-return(mer) |
|
97 |
-} |
|
98 |
- |
|
99 |
-select_path_gly<-function(Glybio_met){ |
|
100 |
- species<-c("- Homo sapiens (human)") |
|
101 |
-ac<-paste("N-Glycan biosynthesis", species) |
|
102 |
-ad<-paste("Various types of N-glycan biosynthesis", species) |
|
103 |
-ae<-paste("Mucin type O-Glycan biosynthesis", species) |
|
104 |
-af<-paste("Other types of O-glycan biosynthesis", species) |
|
105 |
-ag<-paste("Glycosaminoglycan biosynthesis - CS/DS", species) |
|
106 |
-ah<-paste("Glycosaminoglycan biosynthesis - HS/Hep", species) |
|
107 |
-ai<-paste("Glycosaminoglycan biosynthesis - KS", species) |
|
108 |
-al<-paste("Glycosaminoglycan degradation", species) |
|
109 |
-am<-paste("Glycosylphosphatidylinositol(GPI)-anchor biosynthesis", species) |
|
110 |
-an<-paste("Glycosphingolipid biosynthesis - lacto and neolacto series", species) |
|
111 |
-ao<-paste("Glycosphingolipid biosynthesis - globo series", species) |
|
112 |
-ap<-paste("Glycosphingolipid biosynthesis - ganglio series", species) |
|
113 |
-aq<-paste("Lipopolysaccharide biosynthesis", species) |
|
114 |
-ar<-paste("Peptidoglycan biosynthesis", species) |
|
115 |
-as<-paste("Other glycan degradation", species) |
|
116 |
-mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as) |
|
117 |
-return(mer) |
|
118 |
-} |
|
119 |
- |
|
120 |
- |
|
121 |
- |
|
122 |
-select_path_cofa<-function(Cofa_vita_met){ |
|
123 |
- species<-c("- Homo sapiens (human)") |
|
124 |
-ac<-paste("Thiamine metabolism", species) |
|
125 |
-ad<-paste("Riboflavin metabolism", species) |
|
126 |
-ae<-paste("Vitamin B6 metabolism", species) |
|
127 |
-af<-paste("Nicotinate and nicotinamide metabolism", species) |
|
128 |
-ag<-paste("Pantothenate and CoA biosynthesis", species) |
|
129 |
-ah<-paste("Biotin metabolism", species) |
|
130 |
-ai<-paste("Lipoic acid metabolism", species) |
|
131 |
-al<-paste("Folate biosynthesis", species) |
|
132 |
-am<-paste("One carbon pool by folate", species) |
|
133 |
-an<-paste("Retinol metabolism", species) |
|
134 |
-ao<-paste("Porphyrin and chlorophyll metabolism", species) |
|
135 |
-ap<-paste("Ubiquinone and other terpenoid-quinone biosynthesis", species) |
|
136 |
-mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap) |
|
137 |
-return(mer) |
|
138 |
-} |
|
139 |
- |
|
140 |
-select_path_transc<-function(Transcription){ |
|
141 |
- species<-c("- Homo sapiens (human)") |
|
142 |
-ac<-paste("RNA polymerase", species) |
|
143 |
-ad<-paste("Basal transcription factors", species) |
|
144 |
-ae<-paste("Spliceosome", species) |
|
145 |
-af<-paste("Transcription factors", species) |
|
146 |
-ag<-paste("Transcription machinery", species) |
|
147 |
-mer<-c(ac,ad,ae,af,ag) |
|
148 |
-return(mer) |
|
149 |
-} |
|
150 |
- |
|
151 |
- |
|
152 |
- |
|
153 |
-select_path_transl<-function(Translation){ |
|
154 |
- species<-c("- Homo sapiens (human)") |
|
155 |
-ac<-paste("Ribosome", species) |
|
156 |
-ad<-paste("Aminoacyl-tRNA biosynthesis", species) |
|
157 |
-ae<-paste("RNA transport", species) |
|
158 |
-af<-paste("mRNA surveillance pathway", species) |
|
159 |
-ag<-paste("Ribosome biogenesis in eukaryotes", species) |
|
160 |
-ah<-paste("Ribosomal proteins", species) |
|
161 |
-ai<-paste("Ribosome biogenesis", species) |
|
162 |
-al<-paste("Transfer RNA biogenesis", species) |
|
163 |
-am<-paste("Translation factors", species) |
|
164 |
-mer<-c(ac,ad,ae,af,ag,ah,ai,al,am) |
|
165 |
-return(mer) |
|
166 |
-} |
|
167 |
- |
|
168 |
-select_path_fold<-function(Folding_sorting_and_degradation){ |
|
169 |
- species<-c("- Homo sapiens (human)") |
|
170 |
-ac<-paste("Protein export", species) |
|
171 |
-ad<-paste("Protein processing in endoplasmic reticulum", species) |
|
172 |
-ae<-paste("SNARE interactions in vesicular transport", species) |
|
173 |
-af<-paste("Ubiquitin mediated proteolysis", species) |
|
174 |
-ag<-paste("Sulfur relay system", species) |
|
175 |
-ah<-paste("RNA degradation", species) |
|
176 |
-ai<-paste("Chaperones and folding catalysts", species) |
|
177 |
-al<-paste("SNAREs", species) |
|
178 |
-am<-paste("Ubiquitin system", species) |
|
179 |
-an<-paste("Proteasome", species) |
|
180 |
-mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an) |
|
181 |
-return(mer) |
|
182 |
-} |
|
183 |
- |
|
184 |
- |
|
185 |
- |
|
186 |
- |
|
187 |
-select_path_repl<-function(Replication_and_repair){ |
|
188 |
- species<-c("- Homo sapiens (human)") |
|
189 |
-ac<-paste("DNA replication", species) |
|
190 |
-ad<-paste("Base excision repair", species) |
|
191 |
-ae<-paste("Nucleotide excision repair", species) |
|
192 |
-af<-paste("Mismatch repair", species) |
|
193 |
-ag<-paste("Homologous recombination", species) |
|
194 |
-ah<-paste("Non-homologous end-joining", species) |
|
195 |
-ai<-paste("Fanconi anemia pathway", species) |
|
196 |
-al<-paste("DNA replication proteins", species) |
|
197 |
-am<-paste("Chromosome", species) |
|
198 |
-an<-paste("DNA repair and recombination", species) |
|
199 |
-ao<-paste("proteins", species) |
|
200 |
-mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao) |
|
201 |
-return(mer) |
|
202 |
-} |
|
203 |
- |
|
204 |
- |
|
205 |
- |
|
206 |
-select_path_sign<-function(Signal_transduction){ |
|
207 |
- species<-c("- Homo sapiens (human)") |
|
208 |
-a<-paste("Ras signaling pathway", species) |
|
209 |
-b<-paste("Rap1 signaling pathway", species) |
|
210 |
-c<-paste("MAPK signaling pathway", species) |
|
211 |
-d<-paste("ErbB signaling pathway", species) |
|
212 |
-e<-paste("Wnt signaling pathway", species) |
|
213 |
-f<-paste("Notch signaling pathway", species) |
|
214 |
-g<-paste("Hedgehog signaling pathway", species) |
|
215 |
-h<-paste("TGF-beta signaling pathway", species) |
|
216 |
-i<-paste("Hippo signaling pathway", species) |
|
217 |
-l<-paste("VEGF signaling pathway", species) |
|
218 |
-m<-paste("Jak-STAT signaling pathway", species) |
|
219 |
-n<-paste("NF-kappa B signaling pathway", species) |
|
220 |
-o<-paste("TNF signaling pathway", species) |
|
221 |
-p<-paste("HIF-1 signaling pathway", species) |
|
222 |
-q<-paste("FoxO signaling pathway", species) |
|
223 |
-r<-paste("Calcium signaling pathway", species) |
|
224 |
-s<-paste("Phosphatidylinositol signaling system", species) |
|
225 |
-t<-paste("Phospholipase D signaling pathway", species) |
|
226 |
-v<-paste("Sphingolipid signaling pathway", species) |
|
227 |
-u<-paste("cAMP signaling pathway", species) |
|
228 |
-z<-paste("cGMP-PKG signaling pathway", species) |
|
229 |
-ab<-paste("PI3K-Akt signaling pathway", species) |
|
230 |
-ac<-paste("AMPK signaling pathway", species) |
|
231 |
-ad<-paste("mTOR signaling pathway", species) |
|
232 |
-mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,v,u,z,ab,ac,ad) |
|
233 |
-return(mer) |
|
234 |
-} |
|
235 |
- |
|
236 |
- |
|
237 |
-select_path_sign_mol<-function(Signaling_molecules_and_interaction){ |
|
238 |
- species<-c("- Homo sapiens (human)") |
|
239 |
-a<-paste("Neuroactive ligand-receptor interaction", species) |
|
240 |
-b<-paste("Cytokine-cytokine receptor interaction", species) |
|
241 |
-c<-paste("ECM-receptor interaction", species) |
|
242 |
-d<-paste("Cell adhesion molecules (CAMs)", species) |
|
243 |
-mer<-c(a,b,c,d) |
|
244 |
-return(mer) |
|
245 |
-} |
|
246 |
- |
|
247 |
- |
|
248 |
-select_path_transp_ca<-function(Transport_and_catabolism){ |
|
249 |
- species<-c("- Homo sapiens (human)") |
|
250 |
-a<-paste("Endocytosis", species) |
|
251 |
-b<-paste("Phagosome", species) |
|
252 |
-c<-paste("Lysosome", species) |
|
253 |
-d<-paste("Peroxisome", species) |
|
254 |
-e<-paste("Regulation of autophagy", species) |
|
255 |
-mer<-c(a,b,c,d,e) |
|
256 |
-return(mer) |
|
257 |
-} |
|
258 |
- |
|
259 |
-select_path_cell_grow<-function(Cell_growth_and_death){ |
|
260 |
- species<-c("- Homo sapiens (human)") |
|
261 |
- a<-paste("Cell cycle", species) |
|
262 |
-b<-paste("Apoptosis", species) |
|
263 |
-c<-paste("p53 signaling pathway", species) |
|
264 |
-mer<-c(a,b,c) |
|
265 |
-return(mer) |
|
266 |
-} |
|
267 |
- |
|
268 |
- |
|
269 |
-select_path_cell_comm<-function(Cellular_community){ |
|
270 |
- species<-c("- Homo sapiens (human)") |
|
271 |
- a<-paste("Focal adhesion", species) |
|
272 |
-b<-paste("Adherens junction", species) |
|
273 |
-c<-paste("Tight junction", species) |
|
274 |
-d<-paste("Gap junction", species) |
|
275 |
-e<-paste("Signaling pathways regulating pluripotency of stem cells ", species) |
|
276 |
-mer<-c(a,b,c,d,e) |
|
277 |
-return(mer) |
|
278 |
-} |
|
279 |
- |
|
280 |
- |
|
281 |
-select_path_imm_syst<-function(Immune_system){ |
|
282 |
- species<-c("- Homo sapiens (human)") |
|
283 |
-a<-paste("Hematopoietic cell lineage", species) |
|
284 |
-b<-paste("Complement and coagulation cascades", species) |
|
285 |
-c<-paste("Platelet activation", species) |
|
286 |
-d<-paste("Toll-like receptor signaling pathway", species) |
|
287 |
-e<-paste("Toll and Imd signaling pathway", species) |
|
288 |
-f<-paste("NOD-like receptor signaling pathway", species) |
|
289 |
-g<-paste("RIG-I-like receptor signaling pathway", species) |
|
290 |
-h<-paste("Cytosolic DNA-sensing pathway", species) |
|
291 |
-i<-paste("Natural killer cell mediated cytotoxicity", species) |
|
292 |
-l<-paste("Antigen processing and presentation", species) |
|
293 |
-m<-paste("T cell receptor signaling pathway", species) |
|
294 |
-n<-paste("B cell receptor signaling pathway", species) |
|
295 |
-o<-paste("Fc epsilon RI signaling pathway", species) |
|
296 |
-p<-paste("Fc gamma R-mediated phagocytosis", species) |
|
297 |
-q<-paste("Leukocyte transendothelial migration", species) |
|
298 |
-r<-paste("Intestinal immune network for IgA production", species) |
|
299 |
-s<-paste("Chemokine signaling pathway", species) |
|
300 |
- |
|
301 |
-mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s) |
|
302 |
-return(mer) |
|
303 |
-} |
|
304 |
- |
|
305 |
- |
|
306 |
- |
|
307 |
- |
|
308 |
-select_path_end_syst<-function(Endocrine_system){ |
|
309 |
- species<-c("- Homo sapiens (human)") |
|
310 |
-a<-paste("Insulin secretion", species) |
|
311 |
-b<-paste("Insulin signaling pathway", species) |
|
312 |
-c<-paste("Glucagon signaling pathway", species) |
|
313 |
-d<-paste("Regulation of lipolysis in adipocytes", species) |
|
314 |
-e<-paste("Adipocytokine signaling pathway", species) |
|
315 |
-f<-paste("PPAR signaling pathway", species) |
|
316 |
-g<-paste("GnRH signaling pathway", species) |
|
317 |
-h<-paste("Ovarian steroidogenesis", species) |
|
318 |
-i<-paste("Estrogen signaling pathway", species) |
|
319 |
-l<-paste("Progesterone-mediated oocyte maturation", species) |
|
320 |
-m<-paste("Prolactin signaling pathway", species) |
|
321 |
-n<-paste("Oxytocin signaling pathway", species) |
|
322 |
-o<-paste("Thyroid hormone synthesis", species) |
|
323 |
-p<-paste("Thyroid hormone signaling pathway", species) |
|
324 |
-q<-paste("Melanogenesis", species) |
|
325 |
-r<-paste("Renin secretion", species) |
|
326 |
-s<-paste("Renin-angiotensin system", species) |
|
327 |
-t<-paste("Aldosterone synthesis and secretion", species) |
|
328 |
- |
|
329 |
- |
|
330 |
-mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t) |
|
331 |
-return(mer) |
|
332 |
-} |
|
333 |
- |
|
334 |
- |
|
335 |
-select_path_circ_syst<-function(Circulatory_system){ |
|
336 |
- species<-c("- Homo sapiens (human)") |
|
337 |
- a<-paste("Cardiac muscle contraction", species) |
|
338 |
-b<-paste("Adrenergic signaling in cardiomyocytes", species) |
|
339 |
-c<-paste("Vascular smooth muscle contraction", species) |
|
340 |
-mer<-c(a,b,c) |
|
341 |
-return(mer) |
|
342 |
-} |
|
343 |
- |
|
344 |
- |
|
345 |
-select_path_dig_syst<-function(Digestive_system){ |
|
346 |
- species<-c("- Homo sapiens (human)") |
|
347 |
- a<-paste("Salivary secretion", species) |
|
348 |
-b<-paste("Gastric acid secretion", species) |
|
349 |
-c<-paste("Pancreatic secretion", species) |
|
350 |
-d<-paste("Bile secretion", species) |
|
351 |
-e<-paste("Carbohydrate digestion and absorption", species) |
|
352 |
-f<-paste("Protein digestion and absorption", species) |
|
353 |
-g<-paste("Fat digestion and absorption", species) |
|
354 |
-h<-paste("Vitamin digestion and absorption", species) |
|
355 |
-i<-paste("Mineral absorption", species) |
|
356 |
- |
|
357 |
-mer<-c(a,b,c,d,e,f,g,h,i) |
|
358 |
-return(mer) |
|
359 |
-} |
|
360 |
- |
|
361 |
- |
|
362 |
- |
|
363 |
-select_path_exc_syst<-function(Excretory_system){ |
|
364 |
- species<-c("- Homo sapiens (human)") |
|
365 |
- a<-paste("Vasopressin-regulated water reabsorption", species) |
|
366 |
-b<-paste("Aldosterone-regulated sodium reabsorption", species) |
|
367 |
-c<-paste("Endocrine and other factor-regulated calcium reabsorption", species) |
|
368 |
-d<-paste("Proximal tubule bicarbonate reclamation", species) |
|
369 |
-e<-paste("Collecting duct acid secretion", species) |
|
370 |
- |
|
371 |
- |
|
372 |
-mer<-c(a,b,c,d,e) |
|
373 |
-return(mer) |
|
374 |
-} |
|
375 |
- |
|
376 |
- |
|
377 |
-select_path_ner_syst<-function(Nervous_system){ |
|
378 |
- species<-c("- Homo sapiens (human)") |
|
379 |
-a<-paste("Glutamatergic synapse", species) |
|
380 |
-b<-paste("GABAergic synapse", species) |
|
381 |
-c<-paste("Cholinergic synapse", species) |
|
382 |
-d<-paste("Dopaminergic synapse", species) |
|
383 |
-e<-paste("Serotonergic synapse", species) |
|
384 |
-f<-paste("Long-term potentiation", species) |
|
385 |
-g<-paste("Long-term depression", species) |
|
386 |
-h<-paste("Retrograde endocannabinoid signaling", species) |
|
387 |
-i<-paste("Synaptic vesicle cycle", species) |
|
388 |
-l<-paste("Neurotrophin signaling pathway", species) |
|
389 |
- |
|
390 |
-mer<-c(a,b,c,d,e,f,g,h,i,l) |
|
391 |
-return(mer) |
|
392 |
-} |
|
393 |
- |
|
394 |
- |
|
395 |
-select_path_sens_syst<-function(Sensory_system){ |
|
396 |
- species<-c("- Homo sapiens (human)") |
|
397 |
- a<-paste("Phototransduction", species) |
|
398 |
-b<-paste("Olfactory transduction", species) |
|
399 |
-c<-paste("Taste transduction", species) |
|
400 |
-d<-paste("Inflammatory mediator regulation of TRP channels", species) |
|
401 |
-mer<-c(a,b,c,d) |
|
402 |
-return(mer) |
|
403 |
-} |
|
404 |
- |
|
405 |
- |
|
406 |
- |
|
407 |
-#' @title Select the class of TCGA data |
|
408 |
-#' @description select two labels from ID barcode |
|
409 |
-#' @param Dataset gene expression matrix |
|
410 |
-#' @param typesample the labels of the samples (e.g. tumor,normal) |
|
411 |
-#' @export |
|
412 |
-#' @return a gene expression matrix of the samples with specified label |
|
413 |
-#' @examples |
|
414 |
-#' tumo<-SelectedSample(Dataset=Data_CANCER_normUQ_filt,typesample="tumor")[,2] |
|
415 |
-SelectedSample <- function(Dataset,typesample){ |
|
416 |
- if( typesample =="tumor"){ |
|
417 |
- Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) == 01) ] |
|
418 |
- } |
|
419 |
- |
|
420 |
- if( typesample =="normal"){ |
|
421 |
- Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) >= 10) ] |
|
422 |
- } |
|
423 |
- |
|
424 |
- return(Dataset) |
|
425 |
- |
|
426 |
-} |
|
427 |
- |
|
428 |
- |
|
429 |
-#' @title Select the class of TCGA data |
|
430 |
-#' @description select two labels from ID barcode |
|
431 |
-#' @param cutoff cut-off for AUC value |
|
432 |
-#' @param auc.df list of AUC value |
|
433 |
-#' @return a gene expression matrix with only pairwise pathway with a particular cut-off |
|
434 |
-select_class<-function(auc.df,cutoff){ |
|
435 |
-ds<-do.call("rbind", auc.df) |
|
436 |
-tmp_ordered <- as.data.frame(ds[order(ds,decreasing=TRUE),]) |
|
437 |
-colnames(tmp_ordered)<-'pathway' |
|
438 |
-er<-as.data.frame(tmp_ordered$pathway>cutoff) |
|
439 |
-ase<-tmp_ordered[tmp_ordered$pathway>cutoff,] |
|
440 |
-rownames(er)<-rownames(tmp_ordered) |
|
441 |
-er[,2]<-tmp_ordered$pathway |
|
442 |
-lipid_metabolism<-er[1:length(ase),] |
|
443 |
-return(lipid_metabolism) |
|
444 |
-} |
|
445 |
- |
|
446 |
- |
|
447 |
- |
|
448 |
- |
|
449 |
-#' @title Process matrix TCGA data after the selection of pairwise pathway |
|
450 |
-#' @description processing gene expression matrix |
|
451 |
-#' @param measure matrix with measure of cross-talk among pathways |
|
452 |
-#' @param list_perf output of the function select_class |
|
453 |
-#' @return a gene expression matrix for case study 1 |
|
454 |
-process_matrix<-function(measure,list_perf){ |
|
455 |
-scoreMatrix <- as.data.frame(measure[,3:ncol(measure)]) |
|
456 |
-for( i in 1: ncol(scoreMatrix)){ |
|
457 |
- scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i])) |
|
458 |
-} |
|
459 |
-measure[,1] <- gsub(" ", "_", measure[,1]) |
|
460 |
-d<-sub('_-_Homo_sapiens_*', '', measure[,1]) |
|
461 |
-d_pr<- gsub("(human)", "", d, fixed="TRUE") |
|
462 |
-d_pr <- gsub("_", "", d_pr) |
|
463 |
-d_pr <- gsub("-", "", d_pr) |
|
464 |
-measure[,2] <- gsub(" ", "_", measure[,2]) |
|
465 |
-d2<-sub('_-_Homo_sapiens_(human)*', '', measure[,2]) |
|
466 |
-d_pr2<- gsub("(human)", "", d2, fixed="TRUE") |
|
467 |
-d_pr2 <- gsub("_", "", d_pr2) |
|
468 |
-d_pr2 <- gsub("-", "", d_pr2) |
|
469 |
-PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" ) |
|
470 |
-rownames(scoreMatrix) <-PathwaysPair |
|
471 |
-intera<-intersect(rownames(scoreMatrix),rownames(list_perf)) |
|
472 |
-path_bestlipd<-scoreMatrix[intera,] |
|
473 |
-return(path_bestlipd) |
|
474 |
-} |
|
475 |
- |
|
476 |
- |
|
477 |
- |
|
478 |
-process_matrix_cell_process<-function(measure_cell_process){ |
|
479 |
-score__cell_grow_d <- as.data.frame(measure_cell_process[,3:ncol(measure_cell_process)]) |
|
480 |
-for( i in 1: ncol(score__cell_grow_d)){ |
|
481 |
- score__cell_grow_d[,i] <- as.numeric(as.character(score__cell_grow_d[,i])) |
|
482 |
-} |
|
483 |
- |
|
484 |
-measure_cell_process[,1] <- gsub(" ", "_", measure_cell_process[,1]) |
|
485 |
-d<-sub('_-_Homo_sapiens_*', '', measure_cell_process[,1]) |
|
486 |
- |
|
487 |
-d_pr<- gsub("(human)", "", d, fixed="TRUE") |
|
488 |
-d_pr <- gsub("_", "", d_pr) |
|
489 |
-d_pr <- gsub("-", "", d_pr) |
|
490 |
- |
|
491 |
-measure_cell_process[,2] <- gsub(" ", "_", measure_cell_process[,2]) |
|
492 |
-d2<-sub('_-_Homo_sapiens_(human)*', '', measure_cell_process[,2]) |
|
493 |
-d_pr2<- gsub("(human)", "", d2, fixed="TRUE") |
|
494 |
-d_pr2 <- gsub("_", "", d_pr2) |
|
495 |
-d_pr2 <- gsub("-", "", d_pr2) |
|
496 |
- |
|
497 |
-PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" ) |
|
498 |
-rownames(score__cell_grow_d) <-PathwaysPair |
|
499 |
-return(score__cell_grow_d) |
|
500 |
-} |
|
501 |
- |
|
502 |
- |
|
503 |
-#' @title Get human KEGG pathway data. |
|
504 |
-#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway |
|
505 |
-#' @param mer output for example of select_path_carb |
|
506 |
-#' @export |
|
507 |
-#' @importFrom KEGGREST keggList |
|
508 |
-#' @return dataframe with human pathway data |
|
509 |
-proc_path<-function(mer){ |
|
510 |
-pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways |
|
511 |
-common<-intersect(pathways.list,mer) |
|
512 |
-lo<-list() |
|
513 |
-for (i in 1:length(pathways.list)){ |
|
514 |
- if (length(intersect(pathways.list[[i]],common)!=0)){ |
|
515 |
- lo[[i]]<-pathways.list[[i]] |
|
516 |
- names(lo)[[i]]<-names(pathways.list)[[i]] |
|
517 |
- } |
|
518 |
-} |
|
519 |
-pathways.list<-lo[lapply(lo,length)!=0] |
|
520 |
-pathway.codes <- sub("path:", "", names(pathways.list)) |
|
521 |
-b<-do.call("rbind", pathways.list) |
|
522 |
-list_pathkegg<-list(pathway.codes,b) |
|
523 |
-return(list_pathkegg) |
|
524 |
-} |
|
525 |
- |
526 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,500 +0,0 @@ |
1 |
-#' @title Get human KEGG pathway data and network data in order to define the common gene. |
|
2 |
-#' @description path_net creates a list of network data for each human pathway. The network data will be generated when interacting genes belong to that pathway. |
|
3 |
-#' @param data network data as provided by getNETdata |
|
4 |
-#' @param pathway pathway data as provided by getKEGGdata |
|
5 |
-#' @importFrom igraph graph.data.frame induced.subgraph get.data.frame |
|
6 |
-#' @export |
|
7 |
-#' @return a list of network data for each pathway (interacting genes belong to that pathway) |
|
8 |
-#' @examples |
|
9 |
-#' lista_net<-path_net(pathway=path,data=netw) |
|
10 |
-path_net<-function(pathway,data){ |
|
11 |
- lista_int<-list() |
|
12 |
- for (k in 1:ncol(pathway)){ |
|
13 |
- print(colnames(pathway)[k]) |
|
14 |
- currentPathway_genes<-pathway[,k] |
|
15 |
- colnames(data) <- c("gene_symbolA", "gene_symbolB") |
|
16 |
- i <- sapply(data, is.factor) |
|
17 |
- data[i] <- lapply(data[i], as.character) |
|
18 |
- ver<-unlist(data) |
|
19 |
- n<-unique(ver) |
|
20 |
- s<-intersect(n,currentPathway_genes) |
|
21 |
- g <- graph.data.frame(data,directed=FALSE) |
|
22 |
- g2 <- induced.subgraph(graph=g,vids=s) |
|
23 |
- aaa<-get.data.frame(g2) |
|
24 |
- colnames(aaa)[1] <- 'V1' |
|
25 |
- colnames(aaa)[2] <- 'V2' |
|
26 |
- lista_int[[k]]<-aaa |
|
27 |
- names(lista_int)[k]<-colnames(pathway)[k] |
|
28 |
- } |
|
29 |
- return(lista_int) |
|
30 |
-} |
|
31 |
- |
|
32 |
- |
|
33 |
- |
|
34 |
- |
|
35 |
-#' @title Get human KEGG pathway data and output of path_net in order to define the common genes. |
|
36 |
-#' @description list_path_net creates a list of interacting genes for each human pathway. |
|
37 |
-#' @param lista_net output of path_net |
|
38 |
-#' @param pathway pathway data as provided by getKEGGdata |
|
39 |
-#' @export |
|
40 |
-#' @return a list of genes for each pathway (interacting genes belong to that pathway) |
|
41 |
-#' @examples |
|
42 |
-#' lista_netw<-path_net(pathway=path,data=netw) |
|
43 |
-#' list_path<-list_path_net(lista_net=lista_netw,pathway=path) |
|
44 |
-list_path_net<-function(lista_net,pathway){ |
|
45 |
-v=list() |
|
46 |
-bn=list() |
|
47 |
-for (j in 1:length(lista_net)){ |
|
48 |
- cf<-lista_net[[j]] |
|
49 |
- i <- sapply(cf, is.factor) |
|
50 |
- cf[i] <- lapply(cf[i], as.character) |
|
51 |
- colnames(cf) <- c("m_shar_pro", "m2_shar_pro") |
|
52 |
- m<-c(cf$m_shar_pro) |
|
53 |
- m2<-c(cf$m2_shar_pro) |
|
54 |
- s<-c(m,m2) |
|
55 |
- fr<- unique(s) |
|
56 |
- n<-as.data.frame(fr) |
|
57 |
- if(length(n)==0){ |
|
58 |
- v[[j]]<-NULL |
|
59 |
- |
|
60 |
- } |
|
61 |
- if(length(n)!=0){ |
|
62 |
- i <- sapply(n, is.factor) |
|
63 |
- n[i] <- lapply(n[i], as.character) |
|
64 |
- #for (k in 1:ncol(pathway)){ |
|
65 |
- if (length(intersect(n$fr,pathway[,j]))==nrow(n)){ |
|
66 |
- print(paste("List of genes interacting in the same pathway:",colnames(pathway)[j])) |
|
67 |
- aa<-intersect(n$fr,pathway[,j]) |
|
68 |
- v[[j]]<-aa |
|
69 |
- names(v)[j]<-colnames(pathway)[j] |
|
70 |
- } |
|
71 |
-}} |
|
72 |
-return(v)} |
|
73 |
- |
|
74 |
- |
|
75 |
- |
|
76 |
- |
|
77 |
-#' @title Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the gene expression for only pathways given in input . |
|
78 |
-#' @description GE_matrix creates a matrix of gene expression for pathways given by the user. |
|
79 |
-#' @param DataMatrix gene expression matrix (eg.TCGA data) |
|
80 |
-#' @param pathway pathway data as provided by getKEGGdata |
|
81 |
-#' @export |
|
82 |
-#' @return a matrix for each pathway ( gene expression level belong to that pathway) |
|
83 |
-#' @examples |
|
84 |
-#' list_path_gene<-GE_matrix(DataMatrix=tumo[,1:2],pathway=path) |
|
85 |
-GE_matrix<-function(DataMatrix,pathway) { |
|
86 |
- path_name<-sub(' ', '_',colnames(pathway)) |
|
87 |
-d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE") |
|
88 |
-colnames(pathway)<-d_pr |
|
89 |
-#zz<-as.data.frame(rowMeans(DataMatrix)) |
|
90 |
-zz<-as.data.frame(DataMatrix) |
|
91 |
-v<-list() |
|
92 |
-for ( k in 1: ncol(pathway)){ |
|
93 |
- #k=2 |
|
94 |
- if (length(intersect(rownames(zz),pathway[,k])!=0)){ |
|
95 |
- print(colnames(path)[k]) |
|
96 |
- currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k]) |
|
97 |
- currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,]) |
|
98 |
- rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common |
|
99 |
- v[[k]]<- currentPathway_genes_list_commonMatrix |
|
100 |
- names(v)[k]<-colnames(pathway)[k] |
|
101 |
- } |
|
102 |
-} |
|
103 |
-#PEAmatrix <- matrix( 0,nrow(DataMatrix),ncol(pathway)) |
|
104 |
-#rownames(PEAmatrix) <- as.factor(rownames(DataMatrix)) |
|
105 |
-#colnames(PEAmatrix) <- as.factor(colnames(pathway)) |
|
106 |
-#for (i in 1:length(v)){ |
|
107 |
-#PEAmatrix[v[[i]],i]<-zz[v[[i]],] |
|
108 |
-#} |
|
109 |
-#PEAmatrix<-PEAmatrix[which(rowSums(PEAmatrix) > 0),] |
|
110 |
-return(v) |
|
111 |
-} |
|
112 |
- |
|
113 |
- |
|
114 |
- |
|
115 |
-#' @title Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the mean gene expression for only pathways given in input . |
|
116 |
-#' @description GE_matrix creates a matrix of mean gene expression for pathways given by the user. |
|
117 |
-#' @param DataMatrix gene expression matrix (eg.TCGA data) |
|
118 |
-#' @param pathway pathway data as provided by getKEGGdata |
|
119 |
-#' @export |
|
120 |
-#' @return a matrix for each pathway (mean gene expression level belong to that pathway) |
|
121 |
-#' @examples |
|
122 |
-#' list_path_plot<-matrix_plot(DataMatrix=tumo[,1:2],pathway=path) |
|
123 |
-matrix_plot<-function(DataMatrix,pathway) { |
|
124 |
- path_name<-sub(' ', '_',colnames(pathway)) |
|
125 |
- d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE") |
|
126 |
- colnames(pathway)<-d_pr |
|
127 |
- zz<-as.data.frame(rowMeans(DataMatrix)) |
|
128 |
- v<-list() |
|
129 |
- for ( k in 1: ncol(pathway)){ |
|
130 |
- #k=2 |
|
131 |
- if (length(intersect(rownames(zz),pathway[,k])!=0)){ |
|
132 |
- print(colnames(path)[k]) |
|
133 |
- currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k]) |
|
134 |
- currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,]) |
|
135 |
- rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common |
|
136 |
- v[[k]]<- currentPathway_genes_list_common |
|
137 |
- names(v)[k]<-colnames(pathway)[k] |
|
138 |
- } |
|
139 |
- } |
|
140 |
- PEAmatrix <- matrix( 0,nrow(DataMatrix),ncol(pathway)) |
|
141 |
- rownames(PEAmatrix) <- as.factor(rownames(DataMatrix)) |
|
142 |
- colnames(PEAmatrix) <- as.factor(colnames(pathway)) |
|
143 |
- for (i in 1:length(v)){ |
|
144 |
- PEAmatrix[v[[i]],i]<-zz[v[[i]],] |
|
145 |
- } |
|
146 |
- PEAmatrix<-PEAmatrix[which(rowSums(PEAmatrix) > 0),] |
|
147 |
- return(PEAmatrix) |
|
148 |
-} |
|
149 |
- |
|
150 |
- |
|
151 |
- |
|
152 |
- |
|
153 |
- |
|
154 |
- |
|
155 |
- |
|
156 |
- |
|
157 |
- |
|
158 |
- |
|
159 |
- |
|
160 |
- |
|
161 |
- |
|
162 |
-#' @title Get human KEGG pathway data and a gene expression matrix we obtain a matrix with the gene expression for only pathways given in input . |
|
163 |
-#' @description plotting_matrix creates a matrix of gene expression for pathways given by the user. |
|
164 |
-#' @param DataMatrix gene expression matrix (eg.TCGA data) |
|
165 |
-#' @param pathway pathway data as provided by getKEGGdata |
|
166 |
-#' @param path_matrix output of the function matrix_plot |
|
167 |
-#' @export |
|
168 |
-#' @return a plot for pathway cross talk |
|
169 |
-#' @examples |
|
170 |
-#' mt<-plotting_cross_talk(DataMatrix=tumo[,1:2],pathway=path,path_matrix=list_path_plot) |
|
171 |
-plotting_cross_talk<-function(DataMatrix,pathway,path_matrix){ |
|
172 |
- zz<-as.data.frame(rowMeans(DataMatrix)) |
|
173 |
- v<-list() |
|
174 |
- for ( k in 1: ncol(pathway)){ |
|
175 |
- path_name<-sub(' ', '_',colnames(pathway)) |
|
176 |
- d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE") |
|
177 |
- colnames(pathway)<-d_pr |
|
178 |
- if (length(intersect(rownames(zz),pathway[,k])!=0)){ |
|
179 |
- print(colnames(path)[k]) |
|
180 |
- currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k]) |
|
181 |
- currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,]) |
|
182 |
- rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common |
|
183 |
- v[[k]]<- as.factor(currentPathway_genes_list_common) |
|
184 |
- names(v)[k]<-colnames(pathway)[k] |
|
185 |
- } |
|
186 |
- } |
|
187 |
- vv<-list() |
|
188 |
- mi<-t(path_matrix) |
|
189 |
- |
|
190 |
- dc<-cor(mi) |
|
191 |
- for ( k in 1: length(v)){ |
|
192 |
- currentPathway_genes_list_common <- intersect(rownames(dc), v[[k]]) |
|
193 |
- a<-match(currentPathway_genes_list_common,rownames(dc)) |
|
194 |
- vv[[k]]<- a |
|
195 |
- names(vv)[k]<-colnames(pathway)[k] |
|
196 |
- } |
|
197 |
- list_plt=list(corr=dc,gruppi=vv) |
|
198 |
- #r<-qgraph(list_plt$corr, groups=list_plt$gruppi, mar=c(1,1,1,1),minimum=0.6) |
|
199 |
- return(list_plt) |
|
200 |
-} |
|
201 |
- |
|
202 |
- |
|
203 |
- |
|
204 |
- |
|
205 |
-#' @title For TCGA data get human pathway data and creates a matrix with the average of genes for each pathway. |
|
206 |
-#' @description average creates a matrix with a summarized value for each pathway |
|
207 |
-#' @param dataFilt TCGA matrix |
|
208 |
-#' @param pathway pathway data |
|
209 |
-#' @export |
|
210 |
-#' @return a matrix value for each pathway |
|
211 |
-#' @examples |
|
212 |
-#' score_mean<-average(dataFilt=tumo[,1:2],path) |
|
213 |
-average<-function(dataFilt,pathway){ |
|
214 |
- DataMatrix<-dataFilt |
|
215 |
- #dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt)) |
|
216 |
- #DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),] |
|
217 |
- #DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) |
|
218 |
- #rownames(DataMatrix)<-DataMatrix$new.col |
|
219 |
- #DataMatrix$new.col<-NULL |
|
220 |
- |
|
221 |
-PEAmatrix <- matrix( 0, ncol(pathway),ncol(DataMatrix)) |
|
222 |
-rownames(PEAmatrix) <- colnames(pathway) |
|
223 |
-colnames(PEAmatrix) <- colnames(DataMatrix) |
|
224 |
-listIPA_pathways<-colnames(pathway) |
|
225 |
-for ( k in 1: nrow(PEAmatrix)){ |
|
226 |
- #k=1 |
|
227 |
- currentPathway <- colnames(pathway)[k] |
|
228 |
- currentPathway_genes_list_common <- intersect(rownames(DataMatrix), currentPathway_genes<-pathway[,k]) |
|
229 |
- currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,] |
|
230 |
- SumGenes <- colSums(currentPathway_genes_list_commonMatrix) |
|
231 |
- AverageGenes <- SumGenes / length(currentPathway_genes_list_common) |
|
232 |
- PEAmatrix[k,] <- AverageGenes |
|
233 |
-} |
|
234 |
-return(PEAmatrix) |
|
235 |
-} |
|
236 |
- |
|
237 |
- |
|
238 |
- |
|
239 |
- |
|
240 |
- |
|
241 |
- |
|
242 |
- |
|
243 |
- |
|
244 |
- |
|
245 |
- |
|
246 |
- |
|
247 |
- |
|
248 |
-#' @title For TCGA data get human pathway data and creates a measure of cross-talk among pathways |
|
249 |
-#' @description euc_dist_crtlk creates a matrix with euclidean distance for pairwise pathways |
|
250 |
-#' @param dataFilt TCGA matrix |
|
251 |
-#' @param pathway pathway data |
|
252 |
-#' @export |
|
253 |
-#' @return a matrix value for each pathway |
|
254 |
-#' @examples |
|
255 |
-#' score_euc_dista<-euc_dist_crtlk(dataFilt=tumo[,1:2],path) |
|
256 |
-euc_dist_crtlk <- function(dataFilt,pathway){ |
|
257 |
- PEAmatrix<-average(dataFilt,pathway) |
|
258 |
- #step 5 distance |
|
259 |
- # EUCLIDEA DISTANCE |
|
260 |
- df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway |
|
261 |
- df=t(df) |
|
262 |
- ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze |
|
263 |
- colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti |
|
264 |
- for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente |
|
265 |
- patients <- (PEAmatrix)[,p] |
|
266 |
- distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni |
|
267 |
- ma_d[,p]<-distance |
|
268 |
- } |
|
269 |
- euc_dist<-cbind(df,ma_d) # inserisco label con le relazioni tra i pathway |
|
270 |
- return(euc_dist) |
|
271 |
-} |
|
272 |
- |
|
273 |
- |
|
274 |
- |
|
275 |
- |
|
276 |
-#' @title For TCGA data get human pathway data and creates a measure of standard deviations among pathways |
|
277 |
-#' @description st_dv creates a matrix with standard deviation for pathways |
|
278 |
-#' @param DataMatrix TCGA matrix |
|
279 |
-#' @param pathway pathway data |
|
280 |
-#' @export |
|
281 |
-#' @return a matrix value for each pathway |
|
282 |
-#' @examples |
|
283 |
-#' stand_dev<-st_dv(DataMatrix=tumo[,1:2],pathway=path) |
|
284 |
-st_dv<-function(DataMatrix,pathway){ |
|
285 |
-#DataMatrix<-dataFilt |
|
286 |
- |
|
287 |
-#dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt)) |
|
288 |
-#DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),] |
|
289 |
-#DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) |
|
290 |
-#rownames(DataMatrix)<-DataMatrix$new.col |
|
291 |
-#DataMatrix$new.col<-NULL |
|
292 |
- |
|
293 |
-PEAmatrix_sd <- matrix( 0, ncol(pathway),ncol(DataMatrix)) |
|
294 |
-rownames(PEAmatrix_sd) <- colnames(pathway) |
|
295 |
-colnames(PEAmatrix_sd) <- colnames(DataMatrix) |
|
296 |
-for ( k in 1: nrow(PEAmatrix_sd)){ |
|
297 |
- print(colnames(pathway)[k]) |
|
298 |
- currentPathway <- colnames(pathway)[k] |
|
299 |
- currentPathway_genes_list_common <- intersect( rownames(DataMatrix), currentPathway_genes<-pathway[,k]) |
|
300 |
- currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,] |
|
301 |
- stdev<-apply(currentPathway_genes_list_commonMatrix,2,sd) #deviazione standard dei pathway |
|
302 |
- PEAmatrix_sd[k,] <- stdev |
|
303 |
- } |
|
304 |
-return(PEAmatrix_sd) |
|
305 |
-} |
|
306 |
- |
|
307 |
- |
|
308 |
- |
|
309 |
- |
|
310 |
- |
|
311 |
- |
|
312 |
-#' @title For TCGA data get human pathway data and creates a measure of discriminating score among pathways |
|
313 |
-#' @description ds_score_crtlk creates a matrix with discriminating score for pathways |
|
314 |
-#' @param dataFilt TCGA matrix |
|
315 |
-#' @param pathway pathway data |
|
316 |
-#' @export |
|
317 |
-#' @return a matrix value for each pathway |
|
318 |
-#' @examples |
|
319 |
-#' cross_talk_st_dv<-ds_score_crtlk(dataFilt=tumo[,1:2],pathway=path) |
|
320 |
-ds_score_crtlk<-function(dataFilt,pathway){ |
|
321 |
- PEAmatrix<-average(dataFilt,pathway) |
|
322 |
- #step 5 distance |
|
323 |
- # EUCLIDEA DISTANCE |
|
324 |
- df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway |
|
325 |
- df=t(df) |
|
326 |
- ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze |
|
327 |
- colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti |
|
328 |
- for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente |
|
329 |
- patients <- (PEAmatrix)[,p] |
|
330 |
- distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni |
|
331 |
- ma_d[,p]<-distance |
|
332 |
- } |
|
333 |
- PEAmatrix_sd<-st_dv(dataFilt,pathway) |
|
334 |
- df=combn(rownames(PEAmatrix_sd),2) |
|
335 |
- df=t(df) |
|
336 |
- ma<-matrix(0,nrow(df),ncol(PEAmatrix_sd)) # creo matrix che conterr? le somme delle dev st |
|
337 |
- colnames(ma)<-colnames(PEAmatrix_sd) # colnames conterr? il nome dei pazienti |
|
338 |
- for ( p in 1: ncol(PEAmatrix_sd)){ # per ogni paziente |
|
339 |
- patients <- (PEAmatrix_sd)[,p] |
|
340 |
- out <- apply(df, 1, function(x) sum(patients[x])) # calcolo somma delle dev standard tra le possibili combinazioni |
|
341 |
- ma[,p]<-out |
|
342 |
- } |
|
343 |
- score<-ma_d/ma # discriminating score M1-M2/S1+S2 |
|
344 |
- score<- cbind(df,score) |
|
345 |
-return(score) |
|
346 |
-} |
|
347 |
- |
|
348 |
- |
|
349 |
- |
|
350 |
-#' @title SVM classification for each feature |
|
351 |
-#' @description svm class creates a list with auc value |
|
352 |
-#' @param TCGA_matrix gene expression matrix |
|
353 |
-#' @param nfs nfs split data into a training and test set |
|
354 |
-#' @param tumour barcode samples for a class |
|
355 |
-#' @param normal barcode samples for another class |
|
356 |
-#' @export |
|
357 |
-#' @importFrom e1071 tune svm |
|
358 |
-#' @importFrom ROCR prediction performance |
|
359 |
-#' @importFrom grDevices rainbow |
|
360 |
-#' @return a list with AUC value for pairwise pathway |
|
361 |
-#' @examples |
|
362 |
-#' nf <- 60 |
|
363 |
-#' res_class<-svm_classification(TCGA_matrix=score_euc_dist,nfs=nf, |
|
364 |
-#' normal=colnames(norm[,1:10]),tumour=colnames(tumo[,1:10])) |
|
365 |
-svm_classification<-function(TCGA_matrix,tumour,normal,nfs){ |
|
366 |
- #library("e1071") |
|
367 |
- #library(ROCR) |
|
368 |
- |
|
369 |
- scoreMatrix <- as.data.frame(TCGA_matrix[,3:ncol(TCGA_matrix)]) |
|
370 |
- scoreMatrix <-as.data.frame(scoreMatrix) |
|
371 |
- for( i in 1: ncol(scoreMatrix)){ |
|
372 |
- scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i])) |
|
373 |
- } |
|
374 |
- |
|
375 |
- TCGA_matrix[,1] <- gsub(" ", "_", TCGA_matrix[,1]) |
|
376 |
- d<-sub('_-_Homo_sapiens_*', '', TCGA_matrix[,1]) |
|
377 |
- #d_pr<-sub(')*', '', DataMatrix[,1]) |
|
378 |
- |
|
379 |
- d_pr<- gsub("(human)", "", d, fixed="TRUE") |
|
380 |
- d_pr <- gsub("_", "", d_pr) |
|
381 |
- d_pr <- gsub("-", "", d_pr) |
|
382 |
- |
|
383 |
- TCGA_matrix[,2] <- gsub(" ", "_", TCGA_matrix[,2]) |
|
384 |
- d2<-sub('_-_Homo_sapiens_(human)*', '', TCGA_matrix[,2]) |
|
385 |
- d_pr2<- gsub("(human)", "", d2, fixed="TRUE") |
|
386 |
- d_pr2 <- gsub("_", "", d_pr2) |
|
387 |
- d_pr2 <- gsub("-", "", d_pr2) |
|
388 |
- |
|
389 |
- PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" ) |
|
390 |
- |
|
391 |
- rownames(scoreMatrix) <-PathwaysPair |
|
392 |
- |
|
393 |
- |
|
394 |
- tDataMatrix<-as.data.frame(t(scoreMatrix)) |
|
395 |
- #tDataMatrix$Target[,1]<-0 |
|
396 |
- |
|
397 |
- tDataMatrix<-cbind(Target=0,tDataMatrix ) |
|
398 |
- |
|
399 |
- tum<-intersect(rownames(tDataMatrix),tumour) |
|
400 |
- nor<-intersect(rownames(tDataMatrix),normal) |
|
401 |
- #tDataMatrix$ |
|
402 |
- |
|
403 |
- Dataset_g1<-tDataMatrix[nor,] |
|
404 |
- Dataset_g3<- tDataMatrix[tum,] |
|
405 |
- |
|
406 |
- |
|
407 |
-#training=read.table('C:/Users/UserInLab05/Desktop/trai.txt',header = TRUE) |
|
408 |
-#testset=read.table('C:/Users/UserInLab05/Desktop/test.txt',header = TRUE) |
|
409 |
- |
|
410 |
- Dataset_g1$Target <- 0 |
|
411 |
- Dataset_g3$Target<-1 |
|
412 |
-#Dataset_g3 <- Dataset_g3[Dataset_g3$Target <- 1, ] |
|
413 |
- |
|
414 |
-tab_g1_training <- sample(rownames(Dataset_g1),round(nrow(Dataset_g1) / 100 * nfs )) |
|
415 |
-tab_g3_training <- sample(rownames(Dataset_g3),round(nrow(Dataset_g3) / 100 * nfs )) |
|
416 |
-tab_g1_testing <- setdiff(rownames(Dataset_g1),tab_g1_training) |
|
417 |
-tab_g3_testing <- setdiff(rownames(Dataset_g3),tab_g3_training) |
|
418 |
- |
|
419 |
-FR<-intersect(rownames(Dataset_g1),tab_g1_training) |
|
420 |
- |
|
421 |
-#rownames(Dataset_g1)<-Dataset_g1[,1] |
|
422 |
-G1<-Dataset_g1[FR,] |
|
423 |
- |
|
424 |
-FR1<-intersect(rownames(Dataset_g3),tab_g3_training) |
|
425 |
-#rownames(Dataset_g3)<-Dataset_g3$ID |
|
426 |
- |
|
427 |
-G3<-Dataset_g3[FR1,] |
|
428 |
-training<-rbind(G1,G3) |
|
429 |
- |
|
430 |
-inter1<-intersect(rownames(Dataset_g1),tab_g1_testing) |
|
431 |
-#rownames(Dataset_g1)<-Dataset_g1$ID |
|
432 |
- |
|
433 |
-G1_testing<-Dataset_g1[inter1,] |
|
434 |
- |
|
435 |
-inter2<-intersect(rownames(Dataset_g3),tab_g3_testing) |
|
436 |
-#rownames(Dataset_g3)<-Dataset_g3$ID |
|
437 |
-G3_testing<-Dataset_g3[inter2,] |
|
438 |
- |
|
439 |
-testing<-rbind(G1_testing,G3_testing) |
|
440 |
- |
|
441 |
-x <- subset(training, select=-Target) |
|
442 |
-y <- training$Target |
|
443 |
-#testing[,2]<-NULL |
|
444 |
-z<-subset(testing, select=-Target) |
|
445 |
- |
|
446 |
-zi<-testing$Target |
|
447 |
- |
|
448 |
-auc.df<-list() |
|
449 |
-svm_model_after_tune_COMPL<-list() |
|
450 |
-for( k in 2: ncol(training)){ |
|
451 |
- print(colnames(training)[k]) |
|
452 |
- svm_tune <- tune(svm, train.x=x, train.y=y, |
|
453 |
- kernel="radial", ranges=list(cost=10^(-1:2), gamma=c(.5,1,2)),cross=10) |
|
454 |
- #print(svm_tune) |
|
455 |
- |
|
456 |
- svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters$cost, gamma=svm_tune$best.parameters$gamma,cross=10,probability = TRUE) |
|
457 |
- |
|
458 |
- |
|
459 |
- #svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters[1], gamma=svm_tune$best.parameters[2],cross=10,probability = TRUE) |
|
460 |
- #summary(svm_model_after_tune) |
|
461 |
- |
|
462 |
- j=k-1 |
|
463 |
- z2=z[,j] |
|
464 |
- z3<-as.data.frame(z2) |
|
465 |
- #rownames(z3)<-rownames(z) |
|
466 |
- #colnames(z3)<-as.character(paste("X",j,sep = "")) |
|
467 |
- colnames(z3)<-colnames(z)[j] |
|
468 |
- #classifiersMatrix <- c(classifiersMatrix,svm_model_after_tune) |
|
469 |
- pred <- predict(svm_model_after_tune,z3,decision.values=TRUE,cross=10) |
|
470 |
- |
|
471 |
- #a<-table(pred,zi) |
|
472 |
- svm.roc <- prediction(attributes(pred)$decision.values, zi) |
|
473 |
- svm.auc <- performance(svm.roc, 'tpr', 'fpr') |
|
474 |
- |
|
475 |
- perf <- performance(svm.roc, "auc") |
|
476 |
- auc<-perf@y.values[[1]] |
|
477 |
- |
|
478 |
- auc.df[[j]]<- auc |
|
479 |
- svm_model_after_tune_COMPL[[j]]<-svm_model_after_tune |
|
480 |
- |
|
481 |
- palette <- as.matrix(rainbow(ncol(z))) |
|
482 |
- #print(j) |
|
483 |
- if (j >1 & j < 6) { |
|
484 |
- plot(svm.auc,col=palette[j], add=TRUE) |
|
485 |
- legend('bottomright', colnames(z), |
|
486 |
- lty=1, col=palette, bty='n', cex=.90,pch = 20,ncol=1) |
|
487 |
- |
|
488 |
- |
|
489 |
- } |
|
490 |
- else { |
|
491 |
- plot(svm.auc, col=palette[j]) |
|
492 |
- |
|
493 |
- |
|
494 |
- } |
|
495 |