claudiacava authored on 16/10/2017 14:36:16
Showing 45 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,41 @@
1
+Package: StarBioTrek
2
+Type: Package
3
+Title: StarBioTrek
4
+Version: 1.3.2
5
+Date: 10-16-2017
6
+Author: Claudia Cava,
7
+    Isabella Castiglioni
8
+Maintainer: Claudia Cava <claudia.cava@ibfm.cnr.it>
9
+Depends:
10
+    R (>= 3.3)
11
+Imports:
12
+    SpidermiR,
13
+	KEGGREST,
14
+	org.Hs.eg.db,
15
+	AnnotationDbi,
16
+	e1071,
17
+	ROCR,
18
+	grDevices,
19
+	igraph
20
+Description: This tool StarBioTrek presents some methodologies to measure pathway activity and cross-talk among pathways integrating also the information of network data. 
21
+License: GPL (>= 3)
22
+biocViews: GeneRegulation,
23
+    Network,
24
+	Pathways,
25
+	KEGG
26
+Suggests:
27
+    BiocStyle,
28
+    knitr,
29
+    rmarkdown,
30
+    testthat,
31
+	devtools,
32
+	roxygen2,
33
+	qgraph,
34
+	png,
35
+	grid
36
+VignetteBuilder: knitr
37
+LazyData: true
38
+URL: https://github.com/claudiacava/StarBioTrek
39
+BugReports: https://github.com/claudiacava/StarBioTrek/issues
40
+RoxygenNote: 6.0.1
41
+
0 42
new file mode 100644
... ...
@@ -0,0 +1,35 @@
1
+# Generated by roxygen2: do not edit by hand
2
+
3
+export(GE_matrix)
4
+export(IPPI)
5
+export(SelectedSample)
6
+export(average)
7
+export(ds_score_crtlk)
8
+export(euc_dist_crtlk)
9
+export(getKEGGdata)
10
+export(getNETdata)
11
+export(list_path_net)
12
+export(matrix_plot)
13
+export(path_net)
14
+export(plotting_cross_talk)
15
+export(proc_path)
16
+export(st_dv)
17
+export(svm_classification)
18
+importFrom(AnnotationDbi,as.list)
19
+importFrom(AnnotationDbi,mappedkeys)
20
+importFrom(KEGGREST,keggGet)
21
+importFrom(KEGGREST,keggList)
22
+importFrom(ROCR,performance)
23
+importFrom(ROCR,prediction)
24
+importFrom(SpidermiR,SpidermiRanalyze_degree_centrality)
25
+importFrom(SpidermiR,SpidermiRdownload_net)
26
+importFrom(SpidermiR,SpidermiRprepare_NET)
27
+importFrom(SpidermiR,SpidermiRquery_spec_networks)
28
+importFrom(SpidermiR,SpidermiRquery_species)
29
+importFrom(e1071,svm)
30
+importFrom(e1071,tune)
31
+importFrom(grDevices,rainbow)
32
+importFrom(igraph,get.data.frame)
33
+importFrom(igraph,graph.data.frame)
34
+importFrom(igraph,induced.subgraph)
35
+importFrom(org.Hs.eg.db,org.Hs.egSYMBOL2EG)
0 36
new file mode 100644
... ...
@@ -0,0 +1,6 @@
1
+  StarBioTrek 
2
+----------------------------------------------------------------
3
+  FIRST VERSION - FEATURES
4
+
5
+* getKEGGdata	Searching by KEGG data.
6
+* getNETdata	Searching by network data.
0 7
new file mode 100644
... ...
@@ -0,0 +1,64 @@
1
+#' Download data
2
+#'
3
+#' StarBioTrek allows you to Download data of samples from StarBioTrek
4
+#'
5
+#' The functions you're likely to need from \pkg{StarBioTrek} is
6
+#' \code{path_star}
7
+#'Otherwise refer to the vignettes to see
8
+#' how to format the documentation.
9
+#'
10
+#' @docType package
11
+#' @name StarBioTrek
12
+NULL
13
+
14
+#' Pathway data from KEGG
15
+#' @docType data
16
+#' @keywords internal
17
+#' @name path
18
+#' @format A data frame with rows and  variables
19
+NULL
20
+
21
+#' network data
22
+#' @docType data
23
+#' @keywords internal
24
+#' @name netw
25
+#' @format A data frame with  rows and variables
26
+NULL
27
+
28
+
29
+
30
+
31
+#' TCGA data
32
+#' @docType data
33
+#' @keywords internal
34
+#' @name Data_CANCER_normUQ_filt
35
+#' @format A data frame with rows and variables
36
+NULL
37
+
38
+#' Score Matrix of pairwise pathway using euclidean distance
39
+#' @docType data
40
+#' @keywords internal
41
+#' @name score_euc_dist
42
+#' @format A data frame with rows and variables
43
+NULL
44
+
45
+#' TCGA data with normal samples
46
+#' @docType data
47
+#' @keywords internal
48
+#' @name norm
49
+#' @format A data frame with rows and variables
50
+NULL
51
+
52
+#' TCGA data with tumour samples
53
+#' @docType data
54
+#' @keywords internal
55
+#' @name tumo
56
+#' @format A data frame with rows and variables
57
+NULL
58
+
59
+#' A matrix of gene expression for pathways given by the user. 
60
+#' @docType data
61
+#' @keywords internal
62
+#' @name list_path_plot
63
+#' @format A data frame with rows and variables
64
+NULL
0 65
\ No newline at end of file
1 66
new file mode 100644
... ...
@@ -0,0 +1,207 @@
1
+#' @title Get human KEGG pathway data.
2
+#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway 
3
+#' @param KEGG_path  variable
4
+#' @export
5
+#' @importFrom KEGGREST keggList keggGet
6
+#' @importFrom org.Hs.eg.db org.Hs.egSYMBOL2EG
7
+#' @importFrom AnnotationDbi mappedkeys as.list
8
+#' @return dataframe with human pathway data
9
+#' @examples
10
+#' path<-getKEGGdata(KEGG_path="Transcript")
11
+getKEGGdata<-function(KEGG_path){
12
+if (KEGG_path=="Carb_met") {
13
+  mer<-select_path_carb(Carbohydrate)
14
+  c<-proc_path(mer)
15
+  a<-c[[2]]
16
+}
17
+  if (KEGG_path=="Ener_met") {
18
+    mer<-select_path_en(Energy)
19
+    c<-proc_path(mer)
20
+    a<-c[[2]]
21
+  }
22
+  if (KEGG_path=="Lip_met") {
23
+    mer<-select_path_lip(Lipid)
24
+    c<-proc_path(mer)
25
+    a<-c[[2]]
26
+  }
27
+  if (KEGG_path=="Amn_met") {
28
+    mer<-select_path_amn(Aminoacid)
29
+    c<-proc_path(mer)
30
+    a<-c[[2]]
31
+  }
32
+  if (KEGG_path=="Gly_bio_met") {
33
+    mer<-select_path_gly(Glybio_met) 
34
+    c<-proc_path(mer)
35
+    a<-c[[2]]
36
+  }
37
+  if (KEGG_path=="Cof_vit_met") {
38
+    mer<-select_path_cofa(Cofa_vita_met)
39
+    c<-proc_path(mer)
40
+    a<-c[[2]]
41
+  }
42
+  if (KEGG_path=="Transcript") {
43
+    mer<-select_path_transc(Transcription)
44
+    c<-proc_path(mer)
45
+    a<-c[[2]]
46
+  }
47
+  if (KEGG_path=="Transl") {
48
+    mer<-select_path_transl(Translation)
49
+    c<-proc_path(mer)
50
+    a<-c[[2]]
51
+  }
52
+  if (KEGG_path=="Fold_degr") {
53
+    mer<-select_path_fold(Folding_sorting_and_degradation)
54
+    c<-proc_path(mer)
55
+    a<-c[[2]]
56
+  }
57
+  if (KEGG_path=="Repl_repair") {
58
+    mer<-select_path_repl(Replication_and_repair)
59
+    c<-proc_path(mer)
60
+    a<-c[[2]]
61
+  }
62
+  if (KEGG_path=="sign_transd") {
63
+    mer<-select_path_sign(Signal_transduction)
64
+    c<-proc_path(mer)
65
+    a<-c[[2]]
66
+  }
67
+  if (KEGG_path=="sign_mol_int") {
68
+    mer<-select_path_sign_mol(Signaling_molecules_and_interaction)
69
+    c<-proc_path(mer)
70
+    a<-c[[2]]
71
+  }
72
+  if (KEGG_path=="Transp_cat") {
73
+    mer<-select_path_transp_ca(Transport_and_catabolism)
74
+    c<-proc_path(mer)
75
+    a<-c[[2]]
76
+  }
77
+  if (KEGG_path=="cell_grow_d") {
78
+    mer<-select_path_cell_grow(Cell_growth_and_death)
79
+    c<-proc_path(mer)
80
+    a<-c[[2]]
81
+  }
82
+  if (KEGG_path=="cell_comm") {
83
+    mer<-select_path_cell_comm(Cellular_community)
84
+    c<-proc_path(mer)
85
+    a<-c[[2]]
86
+  }
87
+  if (KEGG_path=="imm_syst") {
88
+    mer<-select_path_imm_syst(Immune_system)
89
+    c<-proc_path(mer)
90
+    a<-c[[2]]
91
+  }
92
+  if (KEGG_path=="end_syst") {
93
+    mer<-select_path_end_syst(Endocrine_system)
94
+    c<-proc_path(mer)
95
+    a<-c[[2]]
96
+  }
97
+  if (KEGG_path=="circ_syst") {
98
+    mer<-select_path_circ_syst(Circulatory_system)
99
+    c<-proc_path(mer)
100
+    a<-c[[2]]
101
+  } 
102
+  if (KEGG_path=="dig_syst") {
103
+    mer<-select_path_dig_syst(Digestive_system)
104
+    c<-proc_path(mer)
105
+    a<-c[[2]]
106
+  } 
107
+  if (KEGG_path=="exc_syst") {
108
+    mer<-select_path_exc_syst(Excretory_system)
109
+    c<-proc_path(mer)
110
+    a<-c[[2]]
111
+  }  
112
+  if (KEGG_path=="nerv_syst") {
113
+    mer<-select_path_ner_syst(Nervous_system)
114
+    c<-proc_path(mer)
115
+    a<-c[[2]]
116
+  } 
117
+  if (KEGG_path=="sens_syst") {
118
+    mer<-select_path_sens_syst(Sensory_system)
119
+    c<-proc_path(mer)
120
+    a<-c[[2]]
121
+  } 
122
+if (KEGG_path=="KEGG_path") {
123
+  pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways
124
+pathway.codes <- sub("path:", "", names(pathways.list))
125
+pathways.list<-list(pathways.list)
126
+pathways.list<-pathways.list[lapply(pathways.list,length)!=0] 
127
+list_pathkeg<-do.call("cbind", pathways.list)
128
+c<-list(pathway.codes,list_pathkeg)
129
+a<-c[[2]]
130
+
131
+}
132
+pathway.codes<-c[[1]]
133
+genes.by.pathway <- sapply(pathway.codes,
134
+                           function(pwid){
135
+                             pw <- keggGet(pwid)
136
+                             pw[[1]]$GENE[c(TRUE, FALSE)]
137
+                           })
138
+x <- org.Hs.egSYMBOL2EG
139
+mapped_genes <- mappedkeys(x)
140
+xx <- as.list(x[mapped_genes])
141
+top3 <- matrix(0, length(xx), length(genes.by.pathway))
142
+rownames(top3) <- names(xx)
143
+colnames(top3)<- names(genes.by.pathway)
144
+for (j in  1:length(xx)){
145
+  for (k in  1:length(genes.by.pathway)){
146
+    if (length(intersect(xx[[j]],genes.by.pathway[[k]])!=0)){
147
+      
148
+      top3[j,k]<-names(xx[j]) 
149
+    }
150
+  }
151
+}
152
+top3[top3 == 0] <- " "
153
+#a<-data.frame(pathways.list)
154
+#i <- sapply(a, is.factor)
155
+#a[i] <- lapply(a[i], as.character)
156
+rownames(a)<-sub("path:","",rownames(a))
157
+PROVA<-top3
158
+for( i in 1:ncol(PROVA)) {
159
+  if (colnames(PROVA)[i]==rownames(a)[i]){
160
+    colnames(PROVA)[i]<-a[i]
161
+}
162
+}
163
+return(PROVA)
164
+}
165
+
166
+
167
+#' @title Get network data.
168
+#' @description getNETdata creates a data frame with network data. 
169
+#' Network category can be filtered among: physical interactions, co-localization, genetic interactions and shared protein domain.
170
+#' @param network  variable. The user can use the following parameters 
171
+#' based on the network types to be used. PHint for Physical_interactions,
172
+#' COloc for Co-localization, GENint for Genetic_interactions and
173
+#' SHpd for Shared_protein_domains
174
+#' @param organism organism==NULL default value is homo sapiens
175
+#' @export
176
+#' @importFrom SpidermiR SpidermiRquery_species SpidermiRquery_spec_networks SpidermiRdownload_net SpidermiRprepare_NET
177
+#' @return dataframe with gene-gene (or protein-protein interactions)
178
+#' @examples
179
+#' organism="Saccharomyces_cerevisiae"
180
+#' netw<-getNETdata(network="SHpd",organism)
181
+getNETdata<-function(network,organism=NULL){
182
+  org_shar_pro<-SpidermiRquery_species(species)
183
+  if (is.null(organism)) {
184
+  net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[6,],network)
185
+  out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot)
186
+  geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[6,],data = out_net_shar_pro)
187
+  }
188
+  if( !is.null(organism) ){
189
+    net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[9,],network)
190
+    out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot)
191
+    geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[9,],data = out_net_shar_pro)
192
+}
193
+  ds_shar_pro<-do.call("rbind", geneSymb_net_shar_pro)
194
+  data_shar_pro<-as.data.frame(ds_shar_pro[!duplicated(ds_shar_pro), ]) 
195
+  sdc_shar_pro<-unlist(data_shar_pro$gene_symbolA,data_shar_pro$gene_symbolB)
196
+  m_shar_pro<-c(data_shar_pro$gene_symbolA)
197
+  m2_shar_pro<-c(data_shar_pro$gene_symbolB)
198
+  ss_shar_pro<-cbind(m_shar_pro,m2_shar_pro)
199
+  data_pr_shar_pro<-as.data.frame(ss_shar_pro[!duplicated(ss_shar_pro), ]) 
200
+  colnames(data_pr_shar_pro) <- c("m_shar_pro", "m2_shar_pro")
201
+return(data_pr_shar_pro)
202
+}
203
+
204
+
205
+
206
+
207
+
0 208
new file mode 100644
... ...
@@ -0,0 +1,541 @@
1
+
2
+
3
+
4
+select_path_carb<-function(Carbohydrate){
5
+species<-c("- Homo sapiens (human)")  
6
+a<-paste("Glycolysis / Gluconeogenesis", species)
7
+b<-paste("Citrate cycle (TCA cycle)", species)
8
+c<-paste("Pentose phosphate pathway", species)
9
+d<-paste("Pentose and glucuronate interconversions", species)
10
+e<-paste("Fructose and mannose metabolism", species)
11
+f<-paste("Galactose metabolism", species)
12
+g<-paste("Ascorbate and aldarate metabolism", species)
13
+h<-paste("Starch and sucrose metabolism", species)
14
+i<-paste("Amino sugar and nucleotide sugar metabolism", species)
15
+l<-paste("Pyruvate metabolism", species)
16
+m<-paste("Glyoxylate and dicarboxylate metabolism", species)
17
+n<-paste("Propanoate metabolism", species)
18
+o<-paste("Butanoate metabolism", species)
19
+p<-paste("C5-Branched dibasic acid metabolism", species)
20
+q<-paste("Inositol phosphate metabolism", species)
21
+r<-paste("Enzymes", species)
22
+s<-paste("Compounds with biological roles",species)
23
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s)
24
+return(mer)
25
+}
26
+
27
+select_path_en<-function(Energy){
28
+  species<-c("- Homo sapiens (human)")  
29
+  r<-paste("Oxidative phosphorylation", species)
30
+  s<-paste("Photosynthesis", species)
31
+  t<-paste("Photosynthesis - antenna proteins", species)
32
+  v<-paste("Carbon fixation in photosynthetic organisms", species)
33
+  u<-paste("Carbon fixation pathways in prokaryotes", species)
34
+  z<-paste("Methane metabolism", species)
35
+  aa<-paste("Nitrogen metabolism", species)
36
+  ab<-paste("Sulfur metabolism", species)
37
+  mer<-c(r,s,t,v,u,z,aa,ab)
38
+  return(mer)
39
+}  
40
+  
41
+
42
+select_path_lip<-function(Lipid){ 
43
+  species<-c("- Homo sapiens (human)")  
44
+ac<-paste("Fatty acid biosynthesis", species)
45
+ad<-paste("Fatty acid elongation", species)
46
+ae<-paste("Fatty acid degradation", species)
47
+af<-paste("Synthesis and degradation of ketone bodies", species)
48
+ag<-paste("Cutin, suberine and wax biosynthesis", species)
49
+ah<-paste("Steroid biosynthesis", species)
50
+ai<-paste("Primary bile acid biosynthesis", species)
51
+al<-paste("Secondary bile acid biosynthesis", species)
52
+am<-paste("Steroid hormone biosynthesis", species)
53
+an<-paste("Glycerolipid metabolism", species)
54
+ao<-paste("Glycerophospholipid metabolism", species)
55
+ap<-paste("Ether lipid metabolism", species)
56
+aq<-paste("Sphingolipid metabolism", species)
57
+ar<-paste("Arachidonic acid metabolism", species)
58
+as<-paste("Linoleic acid metabolism", species)
59
+at<-paste("alpha-Linolenic acid metabolism", species)
60
+av<-paste("Biosynthesis of unsaturated fatty acids", species)
61
+
62
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av)
63
+return(mer)
64
+}
65
+
66
+
67
+
68
+
69
+select_path_amn<-function(Aminoacid){ 
70
+  species<-c("- Homo sapiens (human)")  
71
+ac<-paste("Alanine, aspartate and glutamate metabolism", species)
72
+ad<-paste("Glycine, serine and threonine metabolism", species)
73
+ae<-paste("Cysteine and methionine metabolism", species)
74
+af<-paste("Valine, leucine and isoleucine degradation", species)
75
+ag<-paste("Valine, leucine and isoleucine biosynthesis", species)
76
+ah<-paste("Lysine biosynthesis", species)
77
+ai<-paste("Lysine degradation", species)
78
+al<-paste("Arginine biosynthesis", species)
79
+am<-paste("Arginine and proline metabolism", species)
80
+an<-paste("Histidine metabolism", species)
81
+ao<-paste("Tyrosine metabolism", species)
82
+ap<-paste("Phenylalanine metabolism", species)
83
+aq<-paste("Tryptophan metabolism", species)
84
+ar<-paste("Phenylalanine, tyrosine and tryptophan biosynthesis", species)
85
+as<-paste("beta-Alanine metabolism", species)
86
+at<-paste("Taurine and hypotaurine metabolism", species)
87
+av<-paste("Phosphonate and phosphinate metabolism", species)
88
+au<-paste("Selenocompound metabolism", species)
89
+az<-paste("Cyanoamino acid metabolism", species)
90
+a<-paste("D-Glutamine and D-glutamate metabolism", species)
91
+b<-paste("D-Arginine and D-ornithine metabolism", species)
92
+c<-paste("D-Alanine metabolism", species)
93
+d<-paste("Glutathione metabolism", species)
94
+
95
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av,au,az,a,b,c,d)
96
+return(mer)
97
+}
98
+
99
+select_path_gly<-function(Glybio_met){ 
100
+  species<-c("- Homo sapiens (human)") 
101
+ac<-paste("N-Glycan biosynthesis", species)
102
+ad<-paste("Various types of N-glycan biosynthesis", species)
103
+ae<-paste("Mucin type O-Glycan biosynthesis", species)
104
+af<-paste("Other types of O-glycan biosynthesis", species)
105
+ag<-paste("Glycosaminoglycan biosynthesis - CS/DS", species)
106
+ah<-paste("Glycosaminoglycan biosynthesis - HS/Hep", species)
107
+ai<-paste("Glycosaminoglycan biosynthesis - KS", species)
108
+al<-paste("Glycosaminoglycan degradation", species)
109
+am<-paste("Glycosylphosphatidylinositol(GPI)-anchor biosynthesis", species)
110
+an<-paste("Glycosphingolipid biosynthesis - lacto and neolacto series", species)
111
+ao<-paste("Glycosphingolipid biosynthesis - globo series", species)
112
+ap<-paste("Glycosphingolipid biosynthesis - ganglio series", species)
113
+aq<-paste("Lipopolysaccharide biosynthesis", species)
114
+ar<-paste("Peptidoglycan biosynthesis", species)
115
+as<-paste("Other glycan degradation", species)
116
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as)
117
+return(mer)
118
+}
119
+
120
+
121
+
122
+select_path_cofa<-function(Cofa_vita_met){ 
123
+  species<-c("- Homo sapiens (human)")  
124
+ac<-paste("Thiamine metabolism", species)
125
+ad<-paste("Riboflavin metabolism", species)
126
+ae<-paste("Vitamin B6 metabolism", species)
127
+af<-paste("Nicotinate and nicotinamide metabolism", species)
128
+ag<-paste("Pantothenate and CoA biosynthesis", species)
129
+ah<-paste("Biotin metabolism", species)
130
+ai<-paste("Lipoic acid metabolism", species)
131
+al<-paste("Folate biosynthesis", species)
132
+am<-paste("One carbon pool by folate", species)
133
+an<-paste("Retinol metabolism", species)
134
+ao<-paste("Porphyrin and chlorophyll metabolism", species)
135
+ap<-paste("Ubiquinone and other terpenoid-quinone biosynthesis", species) 	
136
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap)
137
+return(mer)
138
+}
139
+
140
+select_path_transc<-function(Transcription){ 
141
+  species<-c("- Homo sapiens (human)")  
142
+ac<-paste("RNA polymerase", species)
143
+ad<-paste("Basal transcription factors", species)
144
+ae<-paste("Spliceosome", species)
145
+af<-paste("Transcription factors", species)
146
+ag<-paste("Transcription machinery", species)
147
+mer<-c(ac,ad,ae,af,ag)
148
+return(mer)
149
+}
150
+
151
+
152
+
153
+select_path_transl<-function(Translation){ 
154
+  species<-c("- Homo sapiens (human)")  
155
+ac<-paste("Ribosome", species)
156
+ad<-paste("Aminoacyl-tRNA biosynthesis", species)
157
+ae<-paste("RNA transport", species)
158
+af<-paste("mRNA surveillance pathway", species)
159
+ag<-paste("Ribosome biogenesis in eukaryotes", species)
160
+ah<-paste("Ribosomal proteins", species)
161
+ai<-paste("Ribosome biogenesis", species)
162
+al<-paste("Transfer RNA biogenesis", species)
163
+am<-paste("Translation factors", species)
164
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am)
165
+return(mer)
166
+}
167
+
168
+select_path_fold<-function(Folding_sorting_and_degradation){ 
169
+  species<-c("- Homo sapiens (human)")  
170
+ac<-paste("Protein export", species)
171
+ad<-paste("Protein processing in endoplasmic reticulum", species)
172
+ae<-paste("SNARE interactions in vesicular transport", species)
173
+af<-paste("Ubiquitin mediated proteolysis", species)
174
+ag<-paste("Sulfur relay system", species)
175
+ah<-paste("RNA degradation", species)
176
+ai<-paste("Chaperones and folding catalysts", species)
177
+al<-paste("SNAREs", species)
178
+am<-paste("Ubiquitin system", species)
179
+an<-paste("Proteasome", species)
180
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an)
181
+return(mer)
182
+}
183
+
184
+
185
+
186
+
187
+select_path_repl<-function(Replication_and_repair){ 
188
+  species<-c("- Homo sapiens (human)")  
189
+ac<-paste("DNA replication", species)
190
+ad<-paste("Base excision repair", species)
191
+ae<-paste("Nucleotide excision repair", species)
192
+af<-paste("Mismatch repair", species)
193
+ag<-paste("Homologous recombination", species)
194
+ah<-paste("Non-homologous end-joining", species)
195
+ai<-paste("Fanconi anemia pathway", species)
196
+al<-paste("DNA replication proteins", species)
197
+am<-paste("Chromosome", species)
198
+an<-paste("DNA repair and recombination", species)
199
+ao<-paste("proteins", species)
200
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao)
201
+return(mer)
202
+}
203
+
204
+
205
+
206
+select_path_sign<-function(Signal_transduction){ 
207
+  species<-c("- Homo sapiens (human)")  
208
+a<-paste("Ras signaling pathway", species)
209
+b<-paste("Rap1 signaling pathway", species)
210
+c<-paste("MAPK signaling pathway", species)
211
+d<-paste("ErbB signaling pathway", species)
212
+e<-paste("Wnt signaling pathway", species)
213
+f<-paste("Notch signaling pathway", species)
214
+g<-paste("Hedgehog signaling pathway", species)
215
+h<-paste("TGF-beta signaling pathway", species)
216
+i<-paste("Hippo signaling pathway", species)
217
+l<-paste("VEGF signaling pathway", species)
218
+m<-paste("Jak-STAT signaling pathway", species)
219
+n<-paste("NF-kappa B signaling pathway", species)
220
+o<-paste("TNF signaling pathway", species)
221
+p<-paste("HIF-1 signaling pathway", species)
222
+q<-paste("FoxO signaling pathway", species)
223
+r<-paste("Calcium signaling pathway", species)
224
+s<-paste("Phosphatidylinositol signaling system", species)
225
+t<-paste("Phospholipase D signaling pathway", species)
226
+v<-paste("Sphingolipid signaling pathway", species)
227
+u<-paste("cAMP signaling pathway", species)
228
+z<-paste("cGMP-PKG signaling pathway", species)
229
+ab<-paste("PI3K-Akt signaling pathway", species)
230
+ac<-paste("AMPK signaling pathway", species)
231
+ad<-paste("mTOR signaling pathway", species)
232
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,v,u,z,ab,ac,ad)
233
+return(mer)
234
+}
235
+
236
+
237
+select_path_sign_mol<-function(Signaling_molecules_and_interaction){ 
238
+  species<-c("- Homo sapiens (human)")  
239
+a<-paste("Neuroactive ligand-receptor interaction", species)
240
+b<-paste("Cytokine-cytokine receptor interaction", species)
241
+c<-paste("ECM-receptor interaction", species)
242
+d<-paste("Cell adhesion molecules (CAMs)", species)
243
+mer<-c(a,b,c,d)
244
+return(mer)
245
+}
246
+
247
+
248
+select_path_transp_ca<-function(Transport_and_catabolism){ 
249
+  species<-c("- Homo sapiens (human)")  
250
+a<-paste("Endocytosis", species)
251
+b<-paste("Phagosome", species)
252
+c<-paste("Lysosome", species)
253
+d<-paste("Peroxisome", species)
254
+e<-paste("Regulation of autophagy", species)
255
+mer<-c(a,b,c,d,e)
256
+return(mer)
257
+}
258
+
259
+select_path_cell_grow<-function(Cell_growth_and_death){ 
260
+  species<-c("- Homo sapiens (human)")  
261
+  a<-paste("Cell cycle", species)
262
+b<-paste("Apoptosis", species)
263
+c<-paste("p53 signaling pathway", species)
264
+mer<-c(a,b,c)
265
+return(mer)
266
+}
267
+
268
+
269
+select_path_cell_comm<-function(Cellular_community){ 
270
+  species<-c("- Homo sapiens (human)")  
271
+  a<-paste("Focal adhesion", species)
272
+b<-paste("Adherens junction", species)
273
+c<-paste("Tight junction", species)
274
+d<-paste("Gap junction", species)
275
+e<-paste("Signaling pathways regulating pluripotency of stem cells ", species)
276
+mer<-c(a,b,c,d,e)
277
+return(mer)
278
+}
279
+
280
+
281
+select_path_imm_syst<-function(Immune_system){
282
+  species<-c("- Homo sapiens (human)")  
283
+a<-paste("Hematopoietic cell lineage", species)
284
+b<-paste("Complement and coagulation cascades", species)
285
+c<-paste("Platelet activation", species)
286
+d<-paste("Toll-like receptor signaling pathway", species)
287
+e<-paste("Toll and Imd signaling pathway", species)
288
+f<-paste("NOD-like receptor signaling pathway", species)
289
+g<-paste("RIG-I-like receptor signaling pathway", species)
290
+h<-paste("Cytosolic DNA-sensing pathway", species)
291
+i<-paste("Natural killer cell mediated cytotoxicity", species)
292
+l<-paste("Antigen processing and presentation", species)
293
+m<-paste("T cell receptor signaling pathway", species)
294
+n<-paste("B cell receptor signaling pathway", species)
295
+o<-paste("Fc epsilon RI signaling pathway", species)
296
+p<-paste("Fc gamma R-mediated phagocytosis", species)
297
+q<-paste("Leukocyte transendothelial migration", species)
298
+r<-paste("Intestinal immune network for IgA production", species)
299
+s<-paste("Chemokine signaling pathway", species)
300
+
301
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s)
302
+return(mer)
303
+}
304
+
305
+
306
+
307
+
308
+select_path_end_syst<-function(Endocrine_system){ 
309
+  species<-c("- Homo sapiens (human)")  
310
+a<-paste("Insulin secretion", species)
311
+b<-paste("Insulin signaling pathway", species)
312
+c<-paste("Glucagon signaling pathway", species)
313
+d<-paste("Regulation of lipolysis in adipocytes", species)
314
+e<-paste("Adipocytokine signaling pathway", species)
315
+f<-paste("PPAR signaling pathway", species)
316
+g<-paste("GnRH signaling pathway", species)
317
+h<-paste("Ovarian steroidogenesis", species)
318
+i<-paste("Estrogen signaling pathway", species)
319
+l<-paste("Progesterone-mediated oocyte maturation", species)
320
+m<-paste("Prolactin signaling pathway", species)
321
+n<-paste("Oxytocin signaling pathway", species)
322
+o<-paste("Thyroid hormone synthesis", species)
323
+p<-paste("Thyroid hormone signaling pathway", species)
324
+q<-paste("Melanogenesis", species)
325
+r<-paste("Renin secretion", species)
326
+s<-paste("Renin-angiotensin system", species)
327
+t<-paste("Aldosterone synthesis and secretion", species)
328
+
329
+
330
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t)
331
+return(mer)
332
+}
333
+
334
+
335
+select_path_circ_syst<-function(Circulatory_system){ 
336
+  species<-c("- Homo sapiens (human)")  
337
+  a<-paste("Cardiac muscle contraction", species)
338
+b<-paste("Adrenergic signaling in cardiomyocytes", species)
339
+c<-paste("Vascular smooth muscle contraction", species)
340
+mer<-c(a,b,c)
341
+return(mer)
342
+}
343
+
344
+
345
+select_path_dig_syst<-function(Digestive_system){ 
346
+  species<-c("- Homo sapiens (human)")  
347
+  a<-paste("Salivary secretion", species)
348
+b<-paste("Gastric acid secretion", species)
349
+c<-paste("Pancreatic secretion", species)
350
+d<-paste("Bile secretion", species)
351
+e<-paste("Carbohydrate digestion and absorption", species)
352
+f<-paste("Protein digestion and absorption", species)
353
+g<-paste("Fat digestion and absorption", species)
354
+h<-paste("Vitamin digestion and absorption", species)
355
+i<-paste("Mineral absorption", species)
356
+
357
+mer<-c(a,b,c,d,e,f,g,h,i)
358
+return(mer)
359
+}
360
+
361
+
362
+
363
+select_path_exc_syst<-function(Excretory_system){ 
364
+  species<-c("- Homo sapiens (human)")  
365
+  a<-paste("Vasopressin-regulated water reabsorption", species)
366
+b<-paste("Aldosterone-regulated sodium reabsorption", species)
367
+c<-paste("Endocrine and other factor-regulated calcium reabsorption", species)
368
+d<-paste("Proximal tubule bicarbonate reclamation", species)
369
+e<-paste("Collecting duct acid secretion", species)
370
+
371
+
372
+mer<-c(a,b,c,d,e)
373
+return(mer)
374
+}
375
+
376
+
377
+select_path_ner_syst<-function(Nervous_system){
378
+  species<-c("- Homo sapiens (human)")  
379
+a<-paste("Glutamatergic synapse", species)
380
+b<-paste("GABAergic synapse", species)
381
+c<-paste("Cholinergic synapse", species)
382
+d<-paste("Dopaminergic synapse", species)
383
+e<-paste("Serotonergic synapse", species)
384
+f<-paste("Long-term potentiation", species)
385
+g<-paste("Long-term depression", species)
386
+h<-paste("Retrograde endocannabinoid signaling", species)
387
+i<-paste("Synaptic vesicle cycle", species)
388
+l<-paste("Neurotrophin signaling pathway", species)
389
+
390
+mer<-c(a,b,c,d,e,f,g,h,i,l)
391
+return(mer)
392
+}
393
+
394
+
395
+select_path_sens_syst<-function(Sensory_system){ 
396
+  species<-c("- Homo sapiens (human)")  
397
+  a<-paste("Phototransduction", species)
398
+b<-paste("Olfactory transduction", species)
399
+c<-paste("Taste transduction", species)
400
+d<-paste("Inflammatory mediator regulation of TRP channels", species)
401
+mer<-c(a,b,c,d)
402
+return(mer)
403
+}
404
+
405
+
406
+
407
+#' @title Select the class of TCGA data
408
+#' @description select two labels from ID barcode
409
+#' @param Dataset gene expression matrix
410
+#' @param typesample the labels of the samples (e.g. tumor,normal)
411
+#' @export
412
+#' @return a gene expression matrix of the samples with specified label
413
+#' @examples
414
+#' tumo<-SelectedSample(Dataset=Data_CANCER_normUQ_filt,typesample="tumor")[,2]
415
+SelectedSample <- function(Dataset,typesample){
416
+  if( typesample =="tumor"){
417
+    Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) == 01) ]
418
+  }
419
+  
420
+  if( typesample =="normal"){
421
+    Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) >= 10) ]
422
+  }
423
+  
424
+  return(Dataset)
425
+  
426
+}
427
+
428
+
429
+#' @title Select the class of TCGA data
430
+#' @description select two labels from ID barcode
431
+#' @param cutoff cut-off for AUC value
432
+#' @param auc.df list of AUC value
433
+#' @return a gene expression matrix with only pairwise pathway with a particular cut-off
434
+select_class<-function(auc.df,cutoff){
435
+ds<-do.call("rbind", auc.df)
436
+tmp_ordered <- as.data.frame(ds[order(ds,decreasing=TRUE),])
437
+colnames(tmp_ordered)<-'pathway'
438
+er<-as.data.frame(tmp_ordered$pathway>cutoff)
439
+ase<-tmp_ordered[tmp_ordered$pathway>cutoff,]
440
+rownames(er)<-rownames(tmp_ordered)
441
+er[,2]<-tmp_ordered$pathway
442
+lipid_metabolism<-er[1:length(ase),]
443
+return(lipid_metabolism)
444
+}
445
+
446
+
447
+
448
+
449
+#' @title Process matrix TCGA data after the selection of pairwise pathway
450
+#' @description processing gene expression matrix
451
+#' @param measure matrix with measure of cross-talk among pathways
452
+#' @param list_perf output of the function select_class 
453
+#' @return a gene expression matrix for case study 1
454
+process_matrix<-function(measure,list_perf){
455
+scoreMatrix <- as.data.frame(measure[,3:ncol(measure)])
456
+for( i in 1: ncol(scoreMatrix)){
457
+  scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i]))
458
+}
459
+measure[,1] <- gsub(" ", "_", measure[,1])
460
+d<-sub('_-_Homo_sapiens_*', '', measure[,1])
461
+d_pr<- gsub("(human)", "", d, fixed="TRUE")
462
+d_pr <- gsub("_", "", d_pr)
463
+d_pr <- gsub("-", "", d_pr)
464
+measure[,2] <- gsub(" ", "_", measure[,2])
465
+d2<-sub('_-_Homo_sapiens_(human)*', '', measure[,2])
466
+d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
467
+d_pr2 <- gsub("_", "", d_pr2)
468
+d_pr2 <- gsub("-", "", d_pr2)
469
+PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
470
+rownames(scoreMatrix) <-PathwaysPair
471
+intera<-intersect(rownames(scoreMatrix),rownames(list_perf))
472
+path_bestlipd<-scoreMatrix[intera,]
473
+return(path_bestlipd)
474
+}
475
+
476
+
477
+
478
+process_matrix_cell_process<-function(measure_cell_process){
479
+score__cell_grow_d <- as.data.frame(measure_cell_process[,3:ncol(measure_cell_process)])
480
+for( i in 1: ncol(score__cell_grow_d)){
481
+  score__cell_grow_d[,i] <- as.numeric(as.character(score__cell_grow_d[,i]))
482
+}
483
+
484
+measure_cell_process[,1] <- gsub(" ", "_", measure_cell_process[,1])
485
+d<-sub('_-_Homo_sapiens_*', '', measure_cell_process[,1])
486
+
487
+d_pr<- gsub("(human)", "", d, fixed="TRUE")
488
+d_pr <- gsub("_", "", d_pr)
489
+d_pr <- gsub("-", "", d_pr)
490
+
491
+measure_cell_process[,2] <- gsub(" ", "_", measure_cell_process[,2])
492
+d2<-sub('_-_Homo_sapiens_(human)*', '', measure_cell_process[,2])
493
+d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
494
+d_pr2 <- gsub("_", "", d_pr2)
495
+d_pr2 <- gsub("-", "", d_pr2)
496
+
497
+PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
498
+rownames(score__cell_grow_d) <-PathwaysPair
499
+return(score__cell_grow_d)
500
+}
501
+
502
+
503
+#' @title Get human KEGG pathway data.
504
+#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway 
505
+#' @param mer  output for example of select_path_carb
506
+#' @export
507
+#' @importFrom KEGGREST keggList
508
+#' @return dataframe with human pathway data
509
+proc_path<-function(mer){
510
+pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways
511
+common<-intersect(pathways.list,mer)
512
+lo<-list()
513
+for (i in 1:length(pathways.list)){
514
+  if (length(intersect(pathways.list[[i]],common)!=0)){
515
+    lo[[i]]<-pathways.list[[i]]
516
+    names(lo)[[i]]<-names(pathways.list)[[i]]
517
+  }
518
+}
519
+pathways.list<-lo[lapply(lo,length)!=0] 
520
+pathway.codes <- sub("path:", "", names(pathways.list))
521
+b<-do.call("rbind", pathways.list)
522
+list_pathkegg<-list(pathway.codes,b)
523
+return(list_pathkegg)
524
+}
525
+
526
+
527
+
528
+delete.NULLs  <-  function(xlist){   # delele null/empty entries in a list
529
+  xlist[unlist(lapply(xlist, nrow) != 0)]
530
+}
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+
0 542
new file mode 100644
... ...
@@ -0,0 +1,570 @@
1
+#' @title Get human KEGG pathway data and network data in order to define the common gene.
2
+#' @description path_net creates a list of network data for each human pathway. The network data will be generated when interacting genes belong to that pathway.  
3
+#' @param data  network data as provided by getNETdata
4
+#' @param pathway  pathway data as provided by getKEGGdata
5
+#' @importFrom igraph graph.data.frame induced.subgraph get.data.frame
6
+#' @export
7
+#' @return a list of network data for each pathway (interacting genes belong to that pathway)
8
+#' @examples
9
+#' lista_net<-path_net(pathway=path,data=netw)
10
+path_net<-function(pathway,data){
11
+  lista_int<-list()
12
+  for (k in 1:ncol(pathway)){
13
+    print(colnames(pathway)[k])
14
+    currentPathway_genes<-pathway[,k]
15
+    colnames(data) <- c("gene_symbolA", "gene_symbolB")
16
+    i <- sapply(data, is.factor)
17
+    data[i] <- lapply(data[i], as.character)
18
+    ver<-unlist(data)
19
+    n<-unique(ver)
20
+    s<-intersect(n,currentPathway_genes)
21
+    g <- graph.data.frame(data,directed=FALSE)
22
+    g2 <- induced.subgraph(graph=g,vids=s)
23
+    aaa<-get.data.frame(g2)
24
+    colnames(aaa)[1] <- 'V1'
25
+    colnames(aaa)[2] <- 'V2'
26
+    lista_int[[k]]<-aaa
27
+    names(lista_int)[k]<-colnames(pathway)[k] 
28
+  }
29
+  return(lista_int)
30
+}
31
+
32
+
33
+
34
+
35
+#' @title Get human KEGG pathway data and output of path_net in order to define the common genes.
36
+#' @description list_path_net creates a list of interacting genes for each human pathway.   
37
+#' @param lista_net  output of path_net
38
+#' @param pathway  pathway data as provided by getKEGGdata
39
+#' @export
40
+#' @return a list of genes for each pathway (interacting genes belong to that pathway)
41
+#' @examples
42
+#' lista_netw<-path_net(pathway=path,data=netw)
43
+#' list_path<-list_path_net(lista_net=lista_netw,pathway=path)
44
+list_path_net<-function(lista_net,pathway){
45
+v=list()
46
+bn=list()
47
+for (j in 1:length(lista_net)){
48
+  cf<-lista_net[[j]]
49
+  i <- sapply(cf, is.factor) 
50
+  cf[i] <- lapply(cf[i], as.character)
51
+  colnames(cf) <- c("m_shar_pro", "m2_shar_pro")
52
+  m<-c(cf$m_shar_pro)
53
+  m2<-c(cf$m2_shar_pro)
54
+  s<-c(m,m2)
55
+  fr<- unique(s)
56
+  n<-as.data.frame(fr)
57
+  if(length(n)==0){
58
+    v[[j]]<-NULL
59
+    
60
+  }
61
+  if(length(n)!=0){
62
+  i <- sapply(n, is.factor) 
63
+  n[i] <- lapply(n[i], as.character)
64
+  #for (k in  1:ncol(pathway)){
65
+  if (length(intersect(n$fr,pathway[,j]))==nrow(n)){
66
+    print(paste("List of genes interacting in the same pathway:",colnames(pathway)[j]))
67
+    aa<-intersect(n$fr,pathway[,j])
68
+    v[[j]]<-aa
69
+    names(v)[j]<-colnames(pathway)[j]
70
+  }
71
+}}
72
+return(v)}
73
+
74
+
75
+
76
+
77
+#' @title Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the gene expression for only pathways given in input .
78
+#' @description GE_matrix creates a matrix of gene expression for pathways given by the user.   
79
+#' @param DataMatrix  gene expression matrix (eg.TCGA data)
80
+#' @param pathway  pathway data as provided by getKEGGdata
81
+#' @export
82
+#' @return a matrix for each pathway ( gene expression level belong to that pathway)
83
+#' @examples
84
+#' list_path_gene<-GE_matrix(DataMatrix=tumo[,1:2],pathway=path)
85
+GE_matrix<-function(DataMatrix,pathway) {
86
+  path_name<-sub(' ', '_',colnames(pathway))
87
+d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE")
88
+colnames(pathway)<-d_pr
89
+#zz<-as.data.frame(rowMeans(DataMatrix))
90
+zz<-as.data.frame(DataMatrix)
91
+v<-list()
92
+for ( k in 1: ncol(pathway)){
93
+  #k=2
94
+  if (length(intersect(rownames(zz),pathway[,k])!=0)){
95
+    print(colnames(path)[k])
96
+  currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k])
97
+  currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,])
98
+  rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common
99
+  v[[k]]<- currentPathway_genes_list_commonMatrix
100
+  names(v)[k]<-colnames(pathway)[k]
101
+  }
102
+}  
103
+#PEAmatrix <- matrix( 0,nrow(DataMatrix),ncol(pathway))
104
+#rownames(PEAmatrix) <- as.factor(rownames(DataMatrix))
105
+#colnames(PEAmatrix) <-  as.factor(colnames(pathway))
106
+#for (i in 1:length(v)){
107
+#PEAmatrix[v[[i]],i]<-zz[v[[i]],]
108
+#}
109
+#PEAmatrix<-PEAmatrix[which(rowSums(PEAmatrix) > 0),]
110
+return(v)
111
+}
112
+
113
+
114
+
115
+#' @title Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the mean gene expression for only pathways given in input .
116
+#' @description GE_matrix creates a matrix of mean gene expression for pathways given by the user.   
117
+#' @param DataMatrix  gene expression matrix (eg.TCGA data)
118
+#' @param pathway  pathway data as provided by getKEGGdata
119
+#' @export
120
+#' @return a matrix for each pathway (mean gene expression level belong to that pathway)
121
+#' @examples
122
+#' list_path_plot<-matrix_plot(DataMatrix=tumo[,1:2],pathway=path)
123
+matrix_plot<-function(DataMatrix,pathway) {
124
+  path_name<-sub(' ', '_',colnames(pathway))
125
+  d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE")
126
+  colnames(pathway)<-d_pr
127
+  zz<-as.data.frame(rowMeans(DataMatrix))
128
+  v<-list()
129
+  for ( k in 1: ncol(pathway)){
130
+    #k=2
131
+    if (length(intersect(rownames(zz),pathway[,k])!=0)){
132
+      print(colnames(path)[k])
133
+      currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k])
134
+      currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,])
135
+      rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common
136
+      v[[k]]<- currentPathway_genes_list_common
137
+      names(v)[k]<-colnames(pathway)[k]
138
+    }
139
+  }  
140
+  PEAmatrix <- matrix( 0,nrow(DataMatrix),ncol(pathway))
141
+  rownames(PEAmatrix) <- as.factor(rownames(DataMatrix))
142
+  colnames(PEAmatrix) <-  as.factor(colnames(pathway))
143
+  for (i in 1:length(v)){
144
+  PEAmatrix[v[[i]],i]<-zz[v[[i]],]
145
+  }
146
+  PEAmatrix<-PEAmatrix[which(rowSums(PEAmatrix) > 0),]
147
+  return(PEAmatrix)
148
+}
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+#' @title Get human KEGG pathway data and a gene expression matrix we obtain a matrix with the gene expression for only pathways given in input .
163
+#' @description plotting_matrix creates a matrix of gene expression for pathways given by the user.   
164
+#' @param DataMatrix  gene expression matrix (eg.TCGA data)
165
+#' @param pathway  pathway data as provided by getKEGGdata
166
+#' @param path_matrix  output of the function matrix_plot
167
+#' @export
168
+#' @return a plot for pathway cross talk
169
+#' @examples
170
+#' mt<-plotting_cross_talk(DataMatrix=tumo[,1:2],pathway=path,path_matrix=list_path_plot)
171
+plotting_cross_talk<-function(DataMatrix,pathway,path_matrix){
172
+  zz<-as.data.frame(rowMeans(DataMatrix))
173
+  v<-list()
174
+  for ( k in 1: ncol(pathway)){
175
+    path_name<-sub(' ', '_',colnames(pathway))
176
+    d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE")
177
+    colnames(pathway)<-d_pr
178
+    if (length(intersect(rownames(zz),pathway[,k])!=0)){
179
+      print(colnames(path)[k])
180
+      currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k])
181
+      currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,])
182
+      rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common
183
+      v[[k]]<- as.factor(currentPathway_genes_list_common)
184
+      names(v)[k]<-colnames(pathway)[k]
185
+    }
186
+  }
187
+  vv<-list()
188
+  mi<-t(path_matrix)
189
+  
190
+  dc<-cor(mi)
191
+  for ( k in 1: length(v)){
192
+    currentPathway_genes_list_common <- intersect(rownames(dc), v[[k]])
193
+    a<-match(currentPathway_genes_list_common,rownames(dc))
194
+    vv[[k]]<- a
195
+    names(vv)[k]<-colnames(pathway)[k]
196
+  }
197
+  list_plt=list(corr=dc,gruppi=vv)
198
+ #r<-qgraph(list_plt$corr, groups=list_plt$gruppi, mar=c(1,1,1,1),minimum=0.6)
199
+  return(list_plt)
200
+}
201
+
202
+
203
+
204
+
205
+#' @title For TCGA data get human pathway data and creates a matrix with the average of genes for each pathway.
206
+#' @description average creates a matrix with a summarized value for each pathway  
207
+#' @param dataFilt TCGA matrix
208
+#' @param pathway pathway data
209
+#' @export
210
+#' @return a matrix value for each pathway 
211
+#' @examples
212
+#' score_mean<-average(dataFilt=tumo[,1:2],path)
213
+average<-function(dataFilt,pathway){
214
+  DataMatrix<-dataFilt
215
+  #dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt))
216
+  #DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),]
217
+  #DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) 
218
+  #rownames(DataMatrix)<-DataMatrix$new.col
219
+  #DataMatrix$new.col<-NULL
220
+
221
+PEAmatrix <- matrix( 0, ncol(pathway),ncol(DataMatrix))
222
+rownames(PEAmatrix) <- colnames(pathway)
223
+colnames(PEAmatrix) <-  colnames(DataMatrix)
224
+listIPA_pathways<-colnames(pathway)
225
+for ( k in 1: nrow(PEAmatrix)){
226
+  #k=1
227
+  currentPathway <- colnames(pathway)[k]
228
+  currentPathway_genes_list_common <- intersect(rownames(DataMatrix), currentPathway_genes<-pathway[,k])
229
+  currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,]
230
+  SumGenes <- colSums(currentPathway_genes_list_commonMatrix)
231
+  AverageGenes <- SumGenes / length(currentPathway_genes_list_common)
232
+  PEAmatrix[k,] <- AverageGenes
233
+}
234
+return(PEAmatrix)
235
+}
236
+
237
+
238
+
239
+  
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+#' @title For TCGA data get human pathway data and creates a measure of cross-talk among pathways 
249
+#' @description euc_dist_crtlk creates a matrix with euclidean distance for pairwise pathways  
250
+#' @param dataFilt TCGA matrix
251
+#' @param pathway pathway data
252
+#' @export
253
+#' @return a matrix value for each pathway 
254
+#' @examples
255
+#' score_euc_dista<-euc_dist_crtlk(dataFilt=tumo[,1:2],path)
256
+euc_dist_crtlk <- function(dataFilt,pathway){
257
+  PEAmatrix<-average(dataFilt,pathway)
258
+  #step 5 distance
259
+  # EUCLIDEA DISTANCE
260
+  df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway
261
+  df=t(df)
262
+  ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze
263
+  colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti
264
+  for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente
265
+    patients <- (PEAmatrix)[,p] 
266
+    distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni
267
+    ma_d[,p]<-distance
268
+  }
269
+  euc_dist<-cbind(df,ma_d) # inserisco label con le relazioni tra i pathway
270
+  return(euc_dist)
271
+}
272
+
273
+
274
+
275
+
276
+#' @title For TCGA data get human pathway data and creates a measure of standard deviations among pathways 
277
+#' @description st_dv creates a matrix with standard deviation for pathways  
278
+#' @param DataMatrix TCGA matrix
279
+#' @param pathway pathway data
280
+#' @export
281
+#' @return a matrix value for each pathway 
282
+#' @examples
283
+#' stand_dev<-st_dv(DataMatrix=tumo[,1:2],pathway=path)
284
+st_dv<-function(DataMatrix,pathway){
285
+#DataMatrix<-dataFilt
286
+
287
+#dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt))
288
+#DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),]
289
+#DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) 
290
+#rownames(DataMatrix)<-DataMatrix$new.col
291
+#DataMatrix$new.col<-NULL
292
+
293
+PEAmatrix_sd <- matrix( 0, ncol(pathway),ncol(DataMatrix))
294
+rownames(PEAmatrix_sd) <- colnames(pathway)
295
+colnames(PEAmatrix_sd) <-  colnames(DataMatrix)
296
+for ( k in 1: nrow(PEAmatrix_sd)){
297
+  print(colnames(pathway)[k])
298
+  currentPathway <- colnames(pathway)[k]
299
+  currentPathway_genes_list_common <- intersect( rownames(DataMatrix), currentPathway_genes<-pathway[,k])
300
+  currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,]
301
+  stdev<-apply(currentPathway_genes_list_commonMatrix,2,sd) #deviazione standard dei pathway
302
+  PEAmatrix_sd[k,] <- stdev
303
+  }
304
+return(PEAmatrix_sd)
305
+}
306
+
307
+
308
+
309
+
310
+
311
+
312
+#' @title For TCGA data get human pathway data and creates a measure of discriminating score among pathways 
313
+#' @description ds_score_crtlk creates a matrix with  discriminating score for pathways  
314
+#' @param dataFilt TCGA matrix
315
+#' @param pathway pathway data
316
+#' @export
317
+#' @return a matrix value for each pathway 
318
+#' @examples
319
+#' cross_talk_st_dv<-ds_score_crtlk(dataFilt=tumo[,1:2],pathway=path)
320
+ds_score_crtlk<-function(dataFilt,pathway){
321
+  PEAmatrix<-average(dataFilt,pathway)
322
+  #step 5 distance
323
+  # EUCLIDEA DISTANCE
324
+  df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway
325
+  df=t(df)
326
+  ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze
327
+  colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti
328
+  for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente
329
+    patients <- (PEAmatrix)[,p] 
330
+    distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni
331
+    ma_d[,p]<-distance
332
+  }
333
+  PEAmatrix_sd<-st_dv(dataFilt,pathway)
334
+  df=combn(rownames(PEAmatrix_sd),2) 
335
+  df=t(df)
336
+  ma<-matrix(0,nrow(df),ncol(PEAmatrix_sd)) # creo matrix che conterr? le somme delle dev st
337
+  colnames(ma)<-colnames(PEAmatrix_sd) # colnames conterr? il nome dei pazienti
338
+  for ( p in 1: ncol(PEAmatrix_sd)){ # per ogni paziente
339
+    patients <- (PEAmatrix_sd)[,p] 
340
+    out <- apply(df, 1, function(x) sum(patients[x])) # calcolo somma delle dev standard tra le possibili combinazioni
341
+    ma[,p]<-out
342
+  }
343
+  score<-ma_d/ma # discriminating score M1-M2/S1+S2
344
+  score<- cbind(df,score)  
345
+return(score)
346
+}
347
+
348
+
349
+
350
+#' @title SVM classification for each feature
351
+#' @description svm class creates a list with auc value  
352
+#' @param TCGA_matrix gene expression matrix
353
+#' @param nfs nfs split data into a training  and test set
354
+#' @param tumour barcode samples for a class
355
+#' @param normal barcode samples for another class
356
+#' @export
357
+#' @importFrom e1071 tune svm 
358
+#' @importFrom ROCR prediction performance 
359
+#' @importFrom  grDevices rainbow
360
+#' @return a list with AUC value for pairwise pathway 
361
+#' @examples
362
+#' nf <- 60
363
+#' res_class<-svm_classification(TCGA_matrix=score_euc_dist,nfs=nf,
364
+#' normal=colnames(norm[,1:10]),tumour=colnames(tumo[,1:10]))
365
+svm_classification<-function(TCGA_matrix,tumour,normal,nfs){
366
+  #library("e1071")
367
+  #library(ROCR)
368
+
369
+  scoreMatrix <- as.data.frame(TCGA_matrix[,3:ncol(TCGA_matrix)])
370
+  scoreMatrix <-as.data.frame(scoreMatrix)
371
+  for( i in 1: ncol(scoreMatrix)){
372
+    scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i]))
373
+  }
374
+
375
+  TCGA_matrix[,1] <- gsub(" ", "_", TCGA_matrix[,1])
376
+  d<-sub('_-_Homo_sapiens_*', '', TCGA_matrix[,1])
377
+  #d_pr<-sub(')*', '', DataMatrix[,1])
378
+  
379
+  d_pr<- gsub("(human)", "", d, fixed="TRUE")
380
+  d_pr <- gsub("_", "", d_pr)
381
+  d_pr <- gsub("-", "", d_pr)
382
+  
383
+  TCGA_matrix[,2] <- gsub(" ", "_", TCGA_matrix[,2])
384
+  d2<-sub('_-_Homo_sapiens_(human)*', '', TCGA_matrix[,2])
385
+  d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
386
+  d_pr2 <- gsub("_", "", d_pr2)
387
+  d_pr2 <- gsub("-", "", d_pr2)
388
+  
389
+  PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
390
+  
391
+  rownames(scoreMatrix) <-PathwaysPair
392
+
393
+  
394
+  tDataMatrix<-as.data.frame(t(scoreMatrix))
395
+  #tDataMatrix$Target[,1]<-0
396
+  
397
+  tDataMatrix<-cbind(Target=0,tDataMatrix )
398
+
399
+  tum<-intersect(rownames(tDataMatrix),tumour)
400
+  nor<-intersect(rownames(tDataMatrix),normal)
401
+  #tDataMatrix$
402
+    
403
+  Dataset_g1<-tDataMatrix[nor,]
404
+  Dataset_g3<- tDataMatrix[tum,]
405
+    
406
+  
407
+#training=read.table('C:/Users/UserInLab05/Desktop/trai.txt',header = TRUE)
408
+#testset=read.table('C:/Users/UserInLab05/Desktop/test.txt',header = TRUE)
409
+
410
+  Dataset_g1$Target <- 0
411
+  Dataset_g3$Target<-1
412
+#Dataset_g3 <- Dataset_g3[Dataset_g3$Target <- 1, ]
413
+  
414
+tab_g1_training <- sample(rownames(Dataset_g1),round(nrow(Dataset_g1) / 100 * nfs ))
415
+tab_g3_training <- sample(rownames(Dataset_g3),round(nrow(Dataset_g3) / 100 * nfs ))
416
+tab_g1_testing <- setdiff(rownames(Dataset_g1),tab_g1_training)
417
+tab_g3_testing <- setdiff(rownames(Dataset_g3),tab_g3_training)
418
+
419
+FR<-intersect(rownames(Dataset_g1),tab_g1_training)
420
+
421
+#rownames(Dataset_g1)<-Dataset_g1[,1]
422
+G1<-Dataset_g1[FR,]
423
+
424
+FR1<-intersect(rownames(Dataset_g3),tab_g3_training)
425
+#rownames(Dataset_g3)<-Dataset_g3$ID
426
+
427
+G3<-Dataset_g3[FR1,]
428
+training<-rbind(G1,G3)
429
+
430
+inter1<-intersect(rownames(Dataset_g1),tab_g1_testing)
431
+#rownames(Dataset_g1)<-Dataset_g1$ID
432
+
433
+G1_testing<-Dataset_g1[inter1,]
434
+
435
+inter2<-intersect(rownames(Dataset_g3),tab_g3_testing)
436
+#rownames(Dataset_g3)<-Dataset_g3$ID
437
+G3_testing<-Dataset_g3[inter2,]
438
+
439
+testing<-rbind(G1_testing,G3_testing)
440
+
441
+x <- subset(training, select=-Target)
442
+y <- training$Target
443
+#testing[,2]<-NULL
444
+z<-subset(testing, select=-Target)
445
+
446
+zi<-testing$Target
447
+
448
+auc.df<-list()
449
+svm_model_after_tune_COMPL<-list()
450
+for( k in 2: ncol(training)){
451
+  print(colnames(training)[k])
452
+  
453
+
454
+  
455
+  svm_tune <- tune(svm, train.x=x, train.y=y, 
456
+                   kernel="radial", ranges=list(cost=10^(-1:2), gamma=c(.5,1,2)),cross=10)
457
+  #print(svm_tune)
458
+  
459
+  svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters$cost, gamma=svm_tune$best.parameters$gamma,cross=10,probability = TRUE)
460
+  
461
+  
462
+  #svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters[1], gamma=svm_tune$best.parameters[2],cross=10,probability = TRUE)
463
+  #summary(svm_model_after_tune)
464
+
465
+  j=k-1
466
+  z2=z[,j]
467
+  z3<-as.data.frame(z2)
468
+  #rownames(z3)<-rownames(z)
469
+  #colnames(z3)<-as.character(paste("X",j,sep = ""))
470
+  colnames(z3)<-colnames(z)[j]
471
+  #classifiersMatrix <- c(classifiersMatrix,svm_model_after_tune)
472
+  pred <- predict(svm_model_after_tune,z3,decision.values=TRUE,cross=10)
473
+
474
+  #a<-table(pred,zi)
475
+  svm.roc <- prediction(attributes(pred)$decision.values, zi)
476
+  svm.auc <- performance(svm.roc, 'tpr', 'fpr')