git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/branches/RELEASE_3_4/madman/Rpacks/StarBioTrek@125026 bc3139a8-67e5-0310-9ffc-ced21a209358

Claudia Cava authored on 12/12/2016 13:01:33
Showing 42 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,39 @@
1
+Package: StarBioTrek
2
+Type: Package
3
+Title: StarBioTrek
4
+Version: 1.0.1
5
+Date: 12-12-2016
6
+Author: Claudia Cava,
7
+    Isabella Castiglioni
8
+Maintainer: Claudia Cava <claudia.cava@ibfm.cnr.it>
9
+Depends:
10
+    R (>= 3.3)
11
+Imports:
12
+    SpidermiR,
13
+	KEGGREST,
14
+	org.Hs.eg.db,
15
+	AnnotationDbi,
16
+	e1071,
17
+	ROCR,
18
+	grDevices
19
+Description: This tool StarBioTrek presents some methodologies to measure pathway activity and cross-talk among pathways integrating also the information of network data. 
20
+License: GPL (>= 3)
21
+biocViews: GeneRegulation,
22
+    Network,
23
+	Pathways,
24
+	KEGG
25
+Suggests:
26
+    BiocStyle,
27
+    knitr,
28
+    rmarkdown,
29
+    testthat,
30
+	devtools,
31
+	roxygen2,
32
+	qgraph,
33
+	png,
34
+	grid
35
+VignetteBuilder: knitr
36
+LazyData: true
37
+URL: https://github.com/claudiacava/StarBioTrek
38
+BugReports: https://github.com/claudiacava/StarBioTrek/issues
39
+RoxygenNote: 5.0.1
0 40
\ No newline at end of file
1 41
new file mode 100644
... ...
@@ -0,0 +1,30 @@
1
+# Generated by roxygen2: do not edit by hand
2
+
3
+export(GE_matrix)
4
+export(SelectedSample)
5
+export(average)
6
+export(ds_score_crtlk)
7
+export(euc_dist_crtlk)
8
+export(getKEGGdata)
9
+export(getNETdata)
10
+export(list_path_net)
11
+export(matrix_plot)
12
+export(path_net)
13
+export(plotting_cross_talk)
14
+export(proc_path)
15
+export(st_dv)
16
+export(svm_classification)
17
+importFrom(AnnotationDbi,as.list)
18
+importFrom(AnnotationDbi,mappedkeys)
19
+importFrom(KEGGREST,keggGet)
20
+importFrom(KEGGREST,keggList)
21
+importFrom(ROCR,performance)
22
+importFrom(ROCR,prediction)
23
+importFrom(SpidermiR,SpidermiRdownload_net)
24
+importFrom(SpidermiR,SpidermiRprepare_NET)
25
+importFrom(SpidermiR,SpidermiRquery_spec_networks)
26
+importFrom(SpidermiR,SpidermiRquery_species)
27
+importFrom(e1071,svm)
28
+importFrom(e1071,tune)
29
+importFrom(grDevices,rainbow)
30
+importFrom(org.Hs.eg.db,org.Hs.egSYMBOL2EG)
0 31
new file mode 100644
... ...
@@ -0,0 +1,6 @@
1
+  StarBioTrek 
2
+----------------------------------------------------------------
3
+  FIRST VERSION - FEATURES
4
+
5
+* getKEGGdata	Searching by KEGG data.
6
+* getNETdata	Searching by network data.
0 7
new file mode 100644
... ...
@@ -0,0 +1,64 @@
1
+#' Download data
2
+#'
3
+#' StarBioTrek allows you to Download data of samples from StarBioTrek
4
+#'
5
+#' The functions you're likely to need from \pkg{StarBioTrek} is
6
+#' \code{path_star}
7
+#'Otherwise refer to the vignettes to see
8
+#' how to format the documentation.
9
+#'
10
+#' @docType package
11
+#' @name StarBioTrek
12
+NULL
13
+
14
+#' Pathway data from KEGG
15
+#' @docType data
16
+#' @keywords internal
17
+#' @name path
18
+#' @format A data frame with rows and  variables
19
+NULL
20
+
21
+#' network data
22
+#' @docType data
23
+#' @keywords internal
24
+#' @name netw
25
+#' @format A data frame with  rows and variables
26
+NULL
27
+
28
+
29
+
30
+
31
+#' TCGA data
32
+#' @docType data
33
+#' @keywords internal
34
+#' @name Data_CANCER_normUQ_filt
35
+#' @format A data frame with rows and variables
36
+NULL
37
+
38
+#' Score Matrix of pairwise pathway using euclidean distance
39
+#' @docType data
40
+#' @keywords internal
41
+#' @name score_euc_dist
42
+#' @format A data frame with rows and variables
43
+NULL
44
+
45
+#' TCGA data with normal samples
46
+#' @docType data
47
+#' @keywords internal
48
+#' @name norm
49
+#' @format A data frame with rows and variables
50
+NULL
51
+
52
+#' TCGA data with tumour samples
53
+#' @docType data
54
+#' @keywords internal
55
+#' @name tumo
56
+#' @format A data frame with rows and variables
57
+NULL
58
+
59
+#' A matrix of gene expression for pathways given by the user. 
60
+#' @docType data
61
+#' @keywords internal
62
+#' @name list_path_plot
63
+#' @format A data frame with rows and variables
64
+NULL
0 65
\ No newline at end of file
1 66
new file mode 100644
... ...
@@ -0,0 +1,207 @@
1
+#' @title Get human KEGG pathway data.
2
+#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway 
3
+#' @param KEGG_path  variable
4
+#' @export
5
+#' @importFrom KEGGREST keggList keggGet
6
+#' @importFrom org.Hs.eg.db org.Hs.egSYMBOL2EG
7
+#' @importFrom AnnotationDbi mappedkeys as.list
8
+#' @return dataframe with human pathway data
9
+#' @examples
10
+#' path<-getKEGGdata(KEGG_path="Transcript")
11
+getKEGGdata<-function(KEGG_path){
12
+if (KEGG_path=="Carb_met") {
13
+  mer<-select_path_carb(Carbohydrate)
14
+  c<-proc_path(mer)
15
+  a<-c[[2]]
16
+}
17
+  if (KEGG_path=="Ener_met") {
18
+    mer<-select_path_en(Energy)
19
+    c<-proc_path(mer)
20
+    a<-c[[2]]
21
+  }
22
+  if (KEGG_path=="Lip_met") {
23
+    mer<-select_path_lip(Lipid)
24
+    c<-proc_path(mer)
25
+    a<-c[[2]]
26
+  }
27
+  if (KEGG_path=="Amn_met") {
28
+    mer<-select_path_amn(Aminoacid)
29
+    c<-proc_path(mer)
30
+    a<-c[[2]]
31
+  }
32
+  if (KEGG_path=="Gly_bio_met") {
33
+    mer<-select_path_gly(Glybio_met)
34
+    c<-proc_path(mer)
35
+    a<-c[[2]]
36
+  }
37
+  if (KEGG_path=="Cof_vit_met") {
38
+    mer<-select_path_gly(Cofa_vita_met)
39
+    c<-proc_path(mer)
40
+    a<-c[[2]]
41
+  }
42
+  if (KEGG_path=="Transcript") {
43
+    mer<-select_path_transc(Transcription)
44
+    c<-proc_path(mer)
45
+    a<-c[[2]]
46
+  }
47
+  if (KEGG_path=="Transl") {
48
+    mer<-select_path_transl(Translation)
49
+    c<-proc_path(mer)
50
+    a<-c[[2]]
51
+  }
52
+  if (KEGG_path=="Fold_degr") {
53
+    mer<-select_path_fold(Folding_sorting_and_degradation)
54
+    c<-proc_path(mer)
55
+    a<-c[[2]]
56
+  }
57
+  if (KEGG_path=="Repl_repair") {
58
+    mer<-select_path_repl(Replication_and_repair)
59
+    c<-proc_path(mer)
60
+    a<-c[[2]]
61
+  }
62
+  if (KEGG_path=="sign_transd") {
63
+    mer<-select_path_sign(Signal_transduction)
64
+    c<-proc_path(mer)
65
+    a<-c[[2]]
66
+  }
67
+  if (KEGG_path=="sign_mol_int") {
68
+    mer<-select_path_sign_mol(Signaling_molecules_and_interaction)
69
+    c<-proc_path(mer)
70
+    a<-c[[2]]
71
+  }
72
+  if (KEGG_path=="Transp_cat") {
73
+    mer<-select_path_transp_ca(Transport_and_catabolism)
74
+    c<-proc_path(mer)
75
+    a<-c[[2]]
76
+  }
77
+  if (KEGG_path=="cell_grow_d") {
78
+    mer<-select_path_cell_grow(Cell_growth_and_death)
79
+    c<-proc_path(mer)
80
+    a<-c[[2]]
81
+  }
82
+  if (KEGG_path=="cell_comm") {
83
+    mer<-select_path_cell_comm(Cellular_community)
84
+    c<-proc_path(mer)
85
+    a<-c[[2]]
86
+  }
87
+  if (KEGG_path=="imm_syst") {
88
+    mer<-select_path_imm_syst(Immune_system)
89
+    c<-proc_path(mer)
90
+    a<-c[[2]]
91
+  }
92
+  if (KEGG_path=="end_syst") {
93
+    mer<-select_path_end_syst(Endocrine_system)
94
+    c<-proc_path(mer)
95
+    a<-c[[2]]
96
+  }
97
+  if (KEGG_path=="circ_syst") {
98
+    mer<-select_path_circ_syst(Circulatory_system)
99
+    c<-proc_path(mer)
100
+    a<-c[[2]]
101
+  } 
102
+  if (KEGG_path=="dig_syst") {
103
+    mer<-select_path_dig_syst(Digestive_system)
104
+    c<-proc_path(mer)
105
+    a<-c[[2]]
106
+  } 
107
+  if (KEGG_path=="exc_syst") {
108
+    mer<-select_path_exc_syst(Excretory_system)
109
+    c<-proc_path(mer)
110
+    a<-c[[2]]
111
+  }  
112
+  if (KEGG_path=="nerv_syst") {
113
+    mer<-select_path_ner_syst(Nervous_system)
114
+    c<-proc_path(mer)
115
+    a<-c[[2]]
116
+  } 
117
+  if (KEGG_path=="sens_syst") {
118
+    mer<-select_path_sens_syst(Sensory_system)
119
+    c<-proc_path(mer)
120
+    a<-c[[2]]
121
+  } 
122
+if (KEGG_path=="KEGG_path") {
123
+  pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways
124
+pathway.codes <- sub("path:", "", names(pathways.list))
125
+pathways.list<-list(pathways.list)
126
+pathways.list<-pathways.list[lapply(pathways.list,length)!=0] 
127
+list_pathkeg<-do.call("cbind", pathways.list)
128
+c<-list(pathway.codes,list_pathkeg)
129
+a<-c[[2]]
130
+
131
+}
132
+pathway.codes<-c[[1]]
133
+genes.by.pathway <- sapply(pathway.codes,
134
+                           function(pwid){
135
+                             pw <- keggGet(pwid)
136
+                             pw[[1]]$GENE[c(TRUE, FALSE)]
137
+                           })
138
+x <- org.Hs.egSYMBOL2EG
139
+mapped_genes <- mappedkeys(x)
140
+xx <- as.list(x[mapped_genes])
141
+top3 <- matrix(0, length(xx), length(genes.by.pathway))
142
+rownames(top3) <- names(xx)
143
+colnames(top3)<- names(genes.by.pathway)
144
+for (j in  1:length(xx)){
145
+  for (k in  1:length(genes.by.pathway)){
146
+    if (length(intersect(xx[[j]],genes.by.pathway[[k]])!=0)){
147
+      
148
+      top3[j,k]<-names(xx[j]) 
149
+    }
150
+  }
151
+}
152
+top3[top3 == 0] <- " "
153
+#a<-data.frame(pathways.list)
154
+#i <- sapply(a, is.factor)
155
+#a[i] <- lapply(a[i], as.character)
156
+rownames(a)<-sub("path:","",rownames(a))
157
+PROVA<-top3
158
+for( i in 1:ncol(PROVA)) {
159
+  if (colnames(PROVA)[i]==rownames(a)[i]){
160
+    colnames(PROVA)[i]<-a[i]
161
+}
162
+}
163
+return(PROVA)
164
+}
165
+
166
+
167
+#' @title Get network data.
168
+#' @description getNETdata creates a data frame with network data. 
169
+#' Network category can be filtered among: physical interactions, co-localization, genetic interactions and shared protein domain.
170
+#' @param network  variable. The user can use the following parameters 
171
+#' based on the network types to be used. PHint for Physical_interactions,
172
+#' COloc for Co-localization, GENint for Genetic_interactions and
173
+#' SHpd for Shared_protein_domains
174
+#' @param organism organism==NULL default value is homo sapiens
175
+#' @export
176
+#' @importFrom SpidermiR SpidermiRquery_species SpidermiRquery_spec_networks SpidermiRdownload_net SpidermiRprepare_NET
177
+#' @return dataframe with gene-gene (or protein-protein interactions)
178
+#' @examples
179
+#' organism="Saccharomyces_cerevisiae"
180
+#' netw<-getNETdata(network="SHpd",organism)
181
+getNETdata<-function(network,organism=NULL){
182
+  org_shar_pro<-SpidermiRquery_species(species)
183
+  if (is.null(organism)) {
184
+  net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[6,],network)
185
+  out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot)
186
+  geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[6,],data = out_net_shar_pro)
187
+  }
188
+  if( !is.null(organism) ){
189
+    net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[9,],network)
190
+    out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot)
191
+    geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[9,],data = out_net_shar_pro)
192
+}
193
+  ds_shar_pro<-do.call("rbind", geneSymb_net_shar_pro)
194
+  data_shar_pro<-as.data.frame(ds_shar_pro[!duplicated(ds_shar_pro), ]) 
195
+  sdc_shar_pro<-unlist(data_shar_pro$gene_symbolA,data_shar_pro$gene_symbolB)
196
+  m_shar_pro<-c(data_shar_pro$gene_symbolA)
197
+  m2_shar_pro<-c(data_shar_pro$gene_symbolB)
198
+  ss_shar_pro<-cbind(m_shar_pro,m2_shar_pro)
199
+  data_pr_shar_pro<-as.data.frame(ss_shar_pro[!duplicated(ss_shar_pro), ]) 
200
+  colnames(data_pr_shar_pro) <- c("m_shar_pro", "m2_shar_pro")
201
+return(data_pr_shar_pro)
202
+}
203
+
204
+
205
+
206
+
207
+
0 208
new file mode 100644
... ...
@@ -0,0 +1,539 @@
1
+
2
+
3
+
4
+select_path_carb<-function(Carbohydrate){
5
+species<-c("- Homo sapiens (human)")  
6
+a<-paste("Glycolysis / Gluconeogenesis", species)
7
+b<-paste("Citrate cycle (TCA cycle)", species)
8
+c<-paste("Pentose phosphate pathway", species)
9
+d<-paste("Pentose and glucuronate interconversions", species)
10
+e<-paste("Fructose and mannose metabolism", species)
11
+f<-paste("Galactose metabolism", species)
12
+g<-paste("Ascorbate and aldarate metabolism", species)
13
+h<-paste("Starch and sucrose metabolism", species)
14
+i<-paste("Amino sugar and nucleotide sugar metabolism", species)
15
+l<-paste("Pyruvate metabolism", species)
16
+m<-paste("Glyoxylate and dicarboxylate metabolism", species)
17
+n<-paste("Propanoate metabolism", species)
18
+o<-paste("Butanoate metabolism", species)
19
+p<-paste("C5-Branched dibasic acid metabolism", species)
20
+q<-paste("Inositol phosphate metabolism", species)
21
+r<-paste("Enzymes", species)
22
+s<-paste("Compounds with biological roles",species)
23
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s)
24
+return(mer)
25
+}
26
+
27
+select_path_en<-function(Energy){
28
+  species<-c("- Homo sapiens (human)")  
29
+  r<-paste("Oxidative phosphorylation", species)
30
+  s<-paste("Photosynthesis", species)
31
+  t<-paste("Photosynthesis - antenna proteins", species)
32
+  v<-paste("Carbon fixation in photosynthetic organisms", species)
33
+  u<-paste("Carbon fixation pathways in prokaryotes", species)
34
+  z<-paste("Methane metabolism", species)
35
+  aa<-paste("Nitrogen metabolism", species)
36
+  ab<-paste("Sulfur metabolism", species)
37
+  mer<-c(r,s,t,v,u,z,aa,ab)
38
+  return(mer)
39
+}  
40
+  
41
+
42
+select_path_lip<-function(Lipid){ 
43
+  species<-c("- Homo sapiens (human)")  
44
+ac<-paste("Fatty acid biosynthesis", species)
45
+ad<-paste("Fatty acid elongation", species)
46
+ae<-paste("Fatty acid degradation", species)
47
+af<-paste("Synthesis and degradation of ketone bodies", species)
48
+ag<-paste("Cutin, suberine and wax biosynthesis", species)
49
+ah<-paste("Steroid biosynthesis", species)
50
+ai<-paste("Primary bile acid biosynthesis", species)
51
+al<-paste("Secondary bile acid biosynthesis", species)
52
+am<-paste("Steroid hormone biosynthesis", species)
53
+an<-paste("Glycerolipid metabolism", species)
54
+ao<-paste("Glycerophospholipid metabolism", species)
55
+ap<-paste("Ether lipid metabolism", species)
56
+aq<-paste("Sphingolipid metabolism", species)
57
+ar<-paste("Arachidonic acid metabolism", species)
58
+as<-paste("Linoleic acid metabolism", species)
59
+at<-paste("alpha-Linolenic acid metabolism", species)
60
+av<-paste("Biosynthesis of unsaturated fatty acids", species)
61
+
62
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av)
63
+return(mer)
64
+}
65
+
66
+
67
+
68
+
69
+select_path_amn<-function(Aminoacid){ 
70
+  species<-c("- Homo sapiens (human)")  
71
+ac<-paste("Alanine, aspartate and glutamate metabolism", species)
72
+ad<-paste("Glycine, serine and threonine metabolism", species)
73
+ae<-paste("Cysteine and methionine metabolism", species)
74
+af<-paste("Valine, leucine and isoleucine degradation", species)
75
+ag<-paste("Valine, leucine and isoleucine biosynthesis", species)
76
+ah<-paste("Lysine biosynthesis", species)
77
+ai<-paste("Lysine degradation", species)
78
+al<-paste("Arginine biosynthesis", species)
79
+am<-paste("Arginine and proline metabolism", species)
80
+an<-paste("Histidine metabolism", species)
81
+ao<-paste("Tyrosine metabolism", species)
82
+ap<-paste("Phenylalanine metabolism", species)
83
+aq<-paste("Tryptophan metabolism", species)
84
+ar<-paste("Phenylalanine, tyrosine and tryptophan biosynthesis", species)
85
+as<-paste("beta-Alanine metabolism", species)
86
+at<-paste("Taurine and hypotaurine metabolism", species)
87
+av<-paste("Phosphonate and phosphinate metabolism", species)
88
+au<-paste("Selenocompound metabolism", species)
89
+az<-paste("Cyanoamino acid metabolism", species)
90
+a<-paste("D-Glutamine and D-glutamate metabolism", species)
91
+b<-paste("D-Arginine and D-ornithine metabolism", species)
92
+c<-paste("D-Alanine metabolism", species)
93
+d<-paste("Glutathione metabolism", species)
94
+
95
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av,au,az,a,b,c,d)
96
+return(mer)
97
+}
98
+
99
+select_path_gly<-function(Glybio_met){ 
100
+ac<-paste("N-Glycan biosynthesis", species)
101
+ad<-paste("Various types of N-glycan biosynthesis", species)
102
+ae<-paste("Mucin type O-Glycan biosynthesis", species)
103
+af<-paste("Other types of O-glycan biosynthesis", species)
104
+ag<-paste("Glycosaminoglycan biosynthesis - CS/DS", species)
105
+ah<-paste("Glycosaminoglycan biosynthesis - HS/Hep", species)
106
+ai<-paste("Glycosaminoglycan biosynthesis - KS", species)
107
+al<-paste("Glycosaminoglycan degradation", species)
108
+am<-paste("Glycosylphosphatidylinositol(GPI)-anchor biosynthesis", species)
109
+an<-paste("Glycosphingolipid biosynthesis - lacto and neolacto series", species)
110
+ao<-paste("Glycosphingolipid biosynthesis - globo series", species)
111
+ap<-paste("Glycosphingolipid biosynthesis - ganglio series", species)
112
+aq<-paste("Lipopolysaccharide biosynthesis", species)
113
+ar<-paste("Peptidoglycan biosynthesis", species)
114
+as<-paste("Other glycan degradation", species)
115
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as)
116
+return(mer)
117
+}
118
+
119
+
120
+
121
+select_path_cofa<-function(Cofa_vita_met){ 
122
+  species<-c("- Homo sapiens (human)")  
123
+ac<-paste("Thiamine metabolism", species)
124
+ad<-paste("Riboflavin metabolism", species)
125
+ae<-paste("Vitamin B6 metabolism", species)
126
+af<-paste("Nicotinate and nicotinamide metabolism", species)
127
+ag<-paste("Pantothenate and CoA biosynthesis", species)
128
+ah<-paste("Biotin metabolism", species)
129
+ai<-paste("Lipoic acid metabolism", species)
130
+al<-paste("Folate biosynthesis", species)
131
+am<-paste("One carbon pool by folate", species)
132
+an<-paste("Retinol metabolism", species)
133
+ao<-paste("Porphyrin and chlorophyll metabolism", species)
134
+ap<-paste("Ubiquinone and other terpenoid-quinone biosynthesis", species) 	
135
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap)
136
+return(mer)
137
+}
138
+
139
+select_path_transc<-function(Transcription){ 
140
+  species<-c("- Homo sapiens (human)")  
141
+ac<-paste("RNA polymerase", species)
142
+ad<-paste("Basal transcription factors", species)
143
+ae<-paste("Spliceosome", species)
144
+af<-paste("Transcription factors", species)
145
+ag<-paste("Transcription machinery", species)
146
+mer<-c(ac,ad,ae,af,ag)
147
+return(mer)
148
+}
149
+
150
+
151
+
152
+select_path_transl<-function(Translation){ 
153
+  species<-c("- Homo sapiens (human)")  
154
+ac<-paste("Ribosome", species)
155
+ad<-paste("Aminoacyl-tRNA biosynthesis", species)
156
+ae<-paste("RNA transport", species)
157
+af<-paste("mRNA surveillance pathway", species)
158
+ag<-paste("Ribosome biogenesis in eukaryotes", species)
159
+ah<-paste("Ribosomal proteins", species)
160
+ai<-paste("Ribosome biogenesis", species)
161
+al<-paste("Transfer RNA biogenesis", species)
162
+am<-paste("Translation factors", species)
163
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am)
164
+return(mer)
165
+}
166
+
167
+select_path_fold<-function(Folding_sorting_and_degradation){ 
168
+  species<-c("- Homo sapiens (human)")  
169
+ac<-paste("Protein export", species)
170
+ad<-paste("Protein processing in endoplasmic reticulum", species)
171
+ae<-paste("SNARE interactions in vesicular transport", species)
172
+af<-paste("Ubiquitin mediated proteolysis", species)
173
+ag<-paste("Sulfur relay system", species)
174
+ah<-paste("RNA degradation", species)
175
+ai<-paste("Chaperones and folding catalysts", species)
176
+al<-paste("SNAREs", species)
177
+am<-paste("Ubiquitin system", species)
178
+an<-paste("Proteasome", species)
179
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an)
180
+return(mer)
181
+}
182
+
183
+
184
+
185
+
186
+select_path_repl<-function(Replication_and_repair){ 
187
+  species<-c("- Homo sapiens (human)")  
188
+ac<-paste("DNA replication", species)
189
+ad<-paste("Base excision repair", species)
190
+ae<-paste("Nucleotide excision repair", species)
191
+af<-paste("Mismatch repair", species)
192
+ag<-paste("Homologous recombination", species)
193
+ah<-paste("Non-homologous end-joining", species)
194
+ai<-paste("Fanconi anemia pathway", species)
195
+al<-paste("DNA replication proteins", species)
196
+am<-paste("Chromosome", species)
197
+an<-paste("DNA repair and recombination", species)
198
+ao<-paste("proteins", species)
199
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao)
200
+return(mer)
201
+}
202
+
203
+
204
+
205
+select_path_sign<-function(Signal_transduction){ 
206
+  species<-c("- Homo sapiens (human)")  
207
+a<-paste("Ras signaling pathway", species)
208
+b<-paste("Rap1 signaling pathway", species)
209
+c<-paste("MAPK signaling pathway", species)
210
+d<-paste("ErbB signaling pathway", species)
211
+e<-paste("Wnt signaling pathway", species)
212
+f<-paste("Notch signaling pathway", species)
213
+g<-paste("Hedgehog signaling pathway", species)
214
+h<-paste("TGF-beta signaling pathway", species)
215
+i<-paste("Hippo signaling pathway", species)
216
+l<-paste("VEGF signaling pathway", species)
217
+m<-paste("Jak-STAT signaling pathway", species)
218
+n<-paste("NF-kappa B signaling pathway", species)
219
+o<-paste("TNF signaling pathway", species)
220
+p<-paste("HIF-1 signaling pathway", species)
221
+q<-paste("FoxO signaling pathway", species)
222
+r<-paste("Calcium signaling pathway", species)
223
+s<-paste("Phosphatidylinositol signaling system", species)
224
+t<-paste("Phospholipase D signaling pathway", species)
225
+v<-paste("Sphingolipid signaling pathway", species)
226
+u<-paste("cAMP signaling pathway", species)
227
+z<-paste("cGMP-PKG signaling pathway", species)
228
+ab<-paste("PI3K-Akt signaling pathway", species)
229
+ac<-paste("AMPK signaling pathway", species)
230
+ad<-paste("mTOR signaling pathway", species)
231
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,v,u,z,ab,ac,ad)
232
+return(mer)
233
+}
234
+
235
+
236
+select_path_sign_mol<-function(Signaling_molecules_and_interaction){ 
237
+  species<-c("- Homo sapiens (human)")  
238
+a<-paste("Neuroactive ligand-receptor interaction", species)
239
+b<-paste("Cytokine-cytokine receptor interaction", species)
240
+c<-paste("ECM-receptor interaction", species)
241
+d<-paste("Cell adhesion molecules (CAMs)", species)
242
+mer<-c(a,b,c,d)
243
+return(mer)
244
+}
245
+
246
+
247
+select_path_transp_ca<-function(Transport_and_catabolism){ 
248
+  species<-c("- Homo sapiens (human)")  
249
+a<-paste("Endocytosis", species)
250
+b<-paste("Phagosome", species)
251
+c<-paste("Lysosome", species)
252
+d<-paste("Peroxisome", species)
253
+e<-paste("Regulation of autophagy", species)
254
+mer<-c(a,b,c,d,e)
255
+return(mer)
256
+}
257
+
258
+select_path_cell_grow<-function(Cell_growth_and_death){ 
259
+  species<-c("- Homo sapiens (human)")  
260
+  a<-paste("Cell cycle", species)
261
+b<-paste("Apoptosis", species)
262
+c<-paste("p53 signaling pathway", species)
263
+mer<-c(a,b,c)
264
+return(mer)
265
+}
266
+
267
+
268
+select_path_cell_comm<-function(Cellular_community){ 
269
+  species<-c("- Homo sapiens (human)")  
270
+  a<-paste("Focal adhesion", species)
271
+b<-paste("Adherens junction", species)
272
+c<-paste("Tight junction", species)
273
+d<-paste("Gap junction", species)
274
+e<-paste("Signaling pathways regulating pluripotency of stem cells ", species)
275
+mer<-c(a,b,c,d,e)
276
+return(mer)
277
+}
278
+
279
+
280
+select_path_imm_syst<-function(Immune_system){
281
+  species<-c("- Homo sapiens (human)")  
282
+a<-paste("Hematopoietic cell lineage", species)
283
+b<-paste("Complement and coagulation cascades", species)
284
+c<-paste("Platelet activation", species)
285
+d<-paste("Toll-like receptor signaling pathway", species)
286
+e<-paste("Toll and Imd signaling pathway", species)
287
+f<-paste("NOD-like receptor signaling pathway", species)
288
+g<-paste("RIG-I-like receptor signaling pathway", species)
289
+h<-paste("Cytosolic DNA-sensing pathway", species)
290
+i<-paste("Natural killer cell mediated cytotoxicity", species)
291
+l<-paste("Antigen processing and presentation", species)
292
+m<-paste("T cell receptor signaling pathway", species)
293
+n<-paste("B cell receptor signaling pathway", species)
294
+o<-paste("Fc epsilon RI signaling pathway", species)
295
+p<-paste("Fc gamma R-mediated phagocytosis", species)
296
+q<-paste("Leukocyte transendothelial migration", species)
297
+r<-paste("Intestinal immune network for IgA production", species)
298
+s<-paste("Chemokine signaling pathway", species)
299
+
300
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s)
301
+return(mer)
302
+}
303
+
304
+
305
+
306
+
307
+select_path_end_syst<-function(Endocrine_system){ 
308
+  species<-c("- Homo sapiens (human)")  
309
+a<-paste("Insulin secretion", species)
310
+b<-paste("Insulin signaling pathway", species)
311
+c<-paste("Glucagon signaling pathway", species)
312
+d<-paste("Regulation of lipolysis in adipocytes", species)
313
+e<-paste("Adipocytokine signaling pathway", species)
314
+f<-paste("PPAR signaling pathway", species)
315
+g<-paste("GnRH signaling pathway", species)
316
+h<-paste("Ovarian steroidogenesis", species)
317
+i<-paste("Estrogen signaling pathway", species)
318
+l<-paste("Progesterone-mediated oocyte maturation", species)
319
+m<-paste("Prolactin signaling pathway", species)
320
+n<-paste("Oxytocin signaling pathway", species)
321
+o<-paste("Thyroid hormone synthesis", species)
322
+p<-paste("Thyroid hormone signaling pathway", species)
323
+q<-paste("Melanogenesis", species)
324
+r<-paste("Renin secretion", species)
325
+s<-paste("Renin-angiotensin system", species)
326
+t<-paste("Aldosterone synthesis and secretion", species)
327
+
328
+
329
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t)
330
+return(mer)
331
+}
332
+
333
+
334
+select_path_circ_syst<-function(Circulatory_system){ 
335
+  species<-c("- Homo sapiens (human)")  
336
+  a<-paste("Cardiac muscle contraction", species)
337
+b<-paste("Adrenergic signaling in cardiomyocytes", species)
338
+c<-paste("Vascular smooth muscle contraction", species)
339
+mer<-c(a,b,c)
340
+return(mer)
341
+}
342
+
343
+
344
+select_path_dig_syst<-function(Digestive_system){ 
345
+  species<-c("- Homo sapiens (human)")  
346
+  a<-paste("Salivary secretion", species)
347
+b<-paste("Gastric acid secretion", species)
348
+c<-paste("Pancreatic secretion", species)
349
+d<-paste("Bile secretion", species)
350
+e<-paste("Carbohydrate digestion and absorption", species)
351
+f<-paste("Protein digestion and absorption", species)
352
+g<-paste("Fat digestion and absorption", species)
353
+h<-paste("Vitamin digestion and absorption", species)
354
+i<-paste("Mineral absorption", species)
355
+
356
+mer<-c(a,b,c,d,e,f,g,h,i)
357
+return(mer)
358
+}
359
+
360
+
361
+
362
+select_path_exc_syst<-function(Excretory_system){ 
363
+  species<-c("- Homo sapiens (human)")  
364
+  a<-paste("Vasopressin-regulated water reabsorption", species)
365
+b<-paste("Aldosterone-regulated sodium reabsorption", species)
366
+c<-paste("Endocrine and other factor-regulated calcium reabsorption", species)
367
+d<-paste("Proximal tubule bicarbonate reclamation", species)
368
+e<-paste("Collecting duct acid secretion", species)
369
+
370
+
371
+mer<-c(a,b,c,d,e)
372
+return(mer)
373
+}
374
+
375
+
376
+select_path_ner_syst<-function(Nervous_system){
377
+  species<-c("- Homo sapiens (human)")  
378
+a<-paste("Glutamatergic synapse", species)
379
+b<-paste("GABAergic synapse", species)
380
+c<-paste("Cholinergic synapse", species)
381
+d<-paste("Dopaminergic synapse", species)
382
+e<-paste("Serotonergic synapse", species)
383
+f<-paste("Long-term potentiation", species)
384
+g<-paste("Long-term depression", species)
385
+h<-paste("Retrograde endocannabinoid signaling", species)
386
+i<-paste("Synaptic vesicle cycle", species)
387
+l<-paste("Neurotrophin signaling pathway", species)
388
+
389
+mer<-c(a,b,c,d,e,f,g,h,i,l)
390
+return(mer)
391
+}
392
+
393
+
394
+select_path_sens_syst<-function(Sensory_system){ 
395
+  species<-c("- Homo sapiens (human)")  
396
+  a<-paste("Phototransduction", species)
397
+b<-paste("Olfactory transduction", species)
398
+c<-paste("Taste transduction", species)
399
+d<-paste("Inflammatory mediator regulation of TRP channels", species)
400
+mer<-c(a,b,c,d)
401
+return(mer)
402
+}
403
+
404
+
405
+
406
+#' @title Select the class of TCGA data
407
+#' @description select two labels from ID barcode
408
+#' @param Dataset gene expression matrix
409
+#' @param typesample the labels of the samples (e.g. tumor,normal)
410
+#' @export
411
+#' @return a gene expression matrix of the samples with specified label
412
+#' @examples
413
+#' tumo<-SelectedSample(Dataset=Data_CANCER_normUQ_filt,typesample="tumor")[,2]
414
+SelectedSample <- function(Dataset,typesample){
415
+  if( typesample =="tumor"){
416
+    Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) == 01) ]
417
+  }
418
+  
419
+  if( typesample =="normal"){
420
+    Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) >= 10) ]
421
+  }
422
+  
423
+  return(Dataset)
424
+  
425
+}
426
+
427
+
428
+#' @title Select the class of TCGA data
429
+#' @description select two labels from ID barcode
430
+#' @param cutoff cut-off for AUC value
431
+#' @param auc.df list of AUC value
432
+#' @return a gene expression matrix with only pairwise pathway with a particular cut-off
433
+select_class<-function(auc.df,cutoff){
434
+ds<-do.call("rbind", auc.df)
435
+tmp_ordered <- as.data.frame(ds[order(ds,decreasing=TRUE),])
436
+colnames(tmp_ordered)<-'pathway'
437
+er<-as.data.frame(tmp_ordered$pathway>cutoff)
438
+ase<-tmp_ordered[tmp_ordered$pathway>cutoff,]
439
+rownames(er)<-rownames(tmp_ordered)
440
+er[,2]<-tmp_ordered$pathway
441
+lipid_metabolism<-er[1:length(ase),]
442
+return(lipid_metabolism)
443
+}
444
+
445
+
446
+
447
+
448
+#' @title Process matrix TCGA data after the selection of pairwise pathway
449
+#' @description processing gene expression matrix
450
+#' @param measure matrix with measure of cross-talk among pathways
451
+#' @param list_perf output of the function select_class 
452
+#' @return a gene expression matrix for case study 1
453
+process_matrix<-function(measure,list_perf){
454
+scoreMatrix <- as.data.frame(measure[,3:ncol(measure)])
455
+for( i in 1: ncol(scoreMatrix)){
456
+  scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i]))
457
+}
458
+measure[,1] <- gsub(" ", "_", measure[,1])
459
+d<-sub('_-_Homo_sapiens_*', '', measure[,1])
460
+d_pr<- gsub("(human)", "", d, fixed="TRUE")
461
+d_pr <- gsub("_", "", d_pr)
462
+d_pr <- gsub("-", "", d_pr)
463
+measure[,2] <- gsub(" ", "_", measure[,2])
464
+d2<-sub('_-_Homo_sapiens_(human)*', '', measure[,2])
465
+d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
466
+d_pr2 <- gsub("_", "", d_pr2)
467
+d_pr2 <- gsub("-", "", d_pr2)
468
+PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
469
+rownames(scoreMatrix) <-PathwaysPair
470
+intera<-intersect(rownames(scoreMatrix),rownames(list_perf))
471
+path_bestlipd<-scoreMatrix[intera,]
472
+return(path_bestlipd)
473
+}
474
+
475
+
476
+
477
+process_matrix_cell_process<-function(measure_cell_process){
478
+score__cell_grow_d <- as.data.frame(measure_cell_process[,3:ncol(measure_cell_process)])
479
+for( i in 1: ncol(score__cell_grow_d)){
480
+  score__cell_grow_d[,i] <- as.numeric(as.character(score__cell_grow_d[,i]))
481
+}
482
+
483
+measure_cell_process[,1] <- gsub(" ", "_", measure_cell_process[,1])
484
+d<-sub('_-_Homo_sapiens_*', '', measure_cell_process[,1])
485
+
486
+d_pr<- gsub("(human)", "", d, fixed="TRUE")
487
+d_pr <- gsub("_", "", d_pr)
488
+d_pr <- gsub("-", "", d_pr)
489
+
490
+measure_cell_process[,2] <- gsub(" ", "_", measure_cell_process[,2])
491
+d2<-sub('_-_Homo_sapiens_(human)*', '', measure_cell_process[,2])
492
+d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
493
+d_pr2 <- gsub("_", "", d_pr2)
494
+d_pr2 <- gsub("-", "", d_pr2)
495
+
496
+PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
497
+rownames(score__cell_grow_d) <-PathwaysPair
498
+return(score__cell_grow_d)
499
+}
500
+
501
+
502
+#' @title Get human KEGG pathway data.
503
+#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway 
504
+#' @param mer  output for example of select_path_carb
505
+#' @export
506
+#' @importFrom KEGGREST keggList
507
+#' @return dataframe with human pathway data
508
+proc_path<-function(mer){
509
+pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways
510
+common<-intersect(pathways.list,mer)
511
+lo<-list()
512
+for (i in 1:length(pathways.list)){
513
+  if (length(intersect(pathways.list[[i]],common)!=0)){
514
+    lo[[i]]<-pathways.list[[i]]
515
+    names(lo)[[i]]<-names(pathways.list)[[i]]
516
+  }
517
+}
518
+pathways.list<-lo[lapply(lo,length)!=0] 
519
+pathway.codes <- sub("path:", "", names(pathways.list))
520
+b<-do.call("rbind", pathways.list)
521
+list_pathkegg<-list(pathway.codes,b)
522
+return(list_pathkegg)
523
+}
524
+
525
+
526
+overlap <- function(net_type,x,currentPathway_genes){
527
+  de<-net_type[which(net_type$m_shar_pro==x),]
528
+  fr<-intersect(de$m2_shar_pro,currentPathway_genes)
529
+  go=list()
530
+  if(length(fr)!=0)    {
531
+    for (i in 1:length(fr)){
532
+      de2<-de[which(de$m2_shar_pro==fr[i]),]
533
+      go[[i]]<-de2
534
+    }
535
+  }            
536
+  dst<-do.call("rbind", go)
537
+  return(dst)
538
+}
539
+
0 540
new file mode 100644
... ...
@@ -0,0 +1,515 @@
1
+#' @title Get human KEGG pathway data and network data in order to define the common gene.
2
+#' @description path_net creates a list of network data for each human pathway. The network data will be generated when interacting genes belong to that pathway.  
3
+#' @param net_type  network data as provided by getNETdata
4
+#' @param pathway  pathway data as provided by getKEGGdata
5
+#' @export
6
+#' @return a list of network data for each pathway (interacting genes belong to that pathway)
7
+#' @examples
8
+#' lista_net<-path_net(pathway=path,net_type=netw)
9
+path_net<-function(pathway,net_type){
10
+  lista_int<-list()
11
+  for (k in 1:ncol(pathway)){
12
+    #k=1 
13
+    print(paste(k,"PATHWAY",colnames(pathway)[k]))
14
+    currentPathway_genes<-pathway[,k]
15
+    common1 <- intersect( net_type$m_shar_pro, currentPathway_genes)
16
+    common2 <- intersect( net_type$m2_shar_pro, currentPathway_genes)
17
+    if (length(common1)==0 & length(common2)==0 ){
18
+      mago2<-character(length = 0)
19
+    }
20
+    if (length(common1)!=0 | length(common2)!=0 ){
21
+      b=list()
22
+      for (i in 1:length(common1)){
23
+        x<-common1[i]
24
+        n<-overlap(net_type,x,currentPathway_genes)
25
+        b[[i]]<-n
26
+      }
27
+      v<-do.call("rbind", b)
28
+      c=list()
29
+      for (i in 1:length(common2)){
30
+        x<-common1[i]
31
+        n<-overlap(net_type,x,currentPathway_genes)
32
+        c[[i]]<-n
33
+      }
34
+      v2<-do.call("rbind", b)
35
+      mago<-rbind(v,v2)
36
+      mago2<-mago[!duplicated(mago), ]
37
+    }
38
+    
39
+    if (length(mago2)!=0){
40
+      lista_int[[k]]<-mago2
41
+    }
42
+    if (length(mago2)==0){
43
+      lista_int[[k]]<-"0"} 
44
+    
45
+    names(lista_int)[k]<-colnames(pathway)[k] 
46
+  }   
47
+  return(lista_int)
48
+}
49
+
50
+
51
+#' @title Get human KEGG pathway data and output of path_net in order to define the common genes.
52
+#' @description list_path_net creates a list of interacting genes for each human pathway.   
53
+#' @param lista_net  output of path_net
54
+#' @param pathway  pathway data as provided by getKEGGdata
55
+#' @export
56
+#' @return a list of genes for each pathway (interacting genes belong to that pathway)
57
+#' @examples
58
+#' lista_netw<-path_net(pathway=path,net_type=netw)
59
+#' list_path<-list_path_net(lista_net=lista_netw,pathway=path)
60
+list_path_net<-function(lista_net,pathway){
61
+v=list()
62
+bn=list()
63
+for (j in 1:length(lista_net)){
64
+  cf<-lista_net[[j]]
65
+  i <- sapply(cf, is.factor) 
66
+  cf[i] <- lapply(cf[i], as.character)
67
+  m<-c(cf$m_shar_pro)
68
+  m2<-c(cf$m2_shar_pro)
69
+  s<-c(m,m2)
70
+  fr<- unique(s)
71
+  n<-as.data.frame(fr)
72
+  if(length(n)==0){
73
+    v[[j]]<-NULL
74
+    
75
+  }
76
+  if(length(n)!=0){
77
+  i <- sapply(n, is.factor) 
78
+  n[i] <- lapply(n[i], as.character)
79
+  #for (k in  1:ncol(pathway)){
80
+  if (length(intersect(n$fr,pathway[,j]))==nrow(n)){
81
+    print(paste("List of genes interacting in the same pathway:",colnames(pathway)[j]))
82
+    aa<-intersect(n$fr,pathway[,j])
83
+    v[[j]]<-aa
84
+    names(v)[j]<-colnames(pathway)[j]
85
+  }
86
+}}
87
+return(v)}
88
+
89
+
90
+
91
+
92
+#' @title Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the gene expression for only pathways given in input .
93
+#' @description GE_matrix creates a matrix of gene expression for pathways given by the user.   
94
+#' @param DataMatrix  gene expression matrix (eg.TCGA data)
95
+#' @param pathway  pathway data as provided by getKEGGdata
96
+#' @export
97
+#' @return a matrix for each pathway ( gene expression level belong to that pathway)
98
+#' @examples
99
+#' list_path_gene<-GE_matrix(DataMatrix=tumo[,1:2],pathway=path)
100
+GE_matrix<-function(DataMatrix,pathway) {
101
+  path_name<-sub(' ', '_',colnames(pathway))
102
+d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE")
103
+colnames(pathway)<-d_pr
104
+#zz<-as.data.frame(rowMeans(DataMatrix))
105
+zz<-as.data.frame(DataMatrix)
106
+v<-list()
107
+for ( k in 1: ncol(pathway)){
108
+  #k=2
109
+  if (length(intersect(rownames(zz),pathway[,k])!=0)){
110
+    print(colnames(path)[k])
111
+  currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k])
112
+  currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,])
113
+  rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common
114
+  v[[k]]<- currentPathway_genes_list_commonMatrix
115
+  names(v)[k]<-colnames(pathway)[k]
116
+  }
117
+}  
118
+#PEAmatrix <- matrix( 0,nrow(DataMatrix),ncol(pathway))
119
+#rownames(PEAmatrix) <- as.factor(rownames(DataMatrix))
120
+#colnames(PEAmatrix) <-  as.factor(colnames(pathway))
121
+#for (i in 1:length(v)){
122
+#PEAmatrix[v[[i]],i]<-zz[v[[i]],]
123
+#}
124
+#PEAmatrix<-PEAmatrix[which(rowSums(PEAmatrix) > 0),]
125
+return(v)
126
+}
127
+
128
+
129
+
130
+#' @title Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the mean gene expression for only pathways given in input .
131
+#' @description GE_matrix creates a matrix of mean gene expression for pathways given by the user.   
132
+#' @param DataMatrix  gene expression matrix (eg.TCGA data)
133
+#' @param pathway  pathway data as provided by getKEGGdata
134
+#' @export
135
+#' @return a matrix for each pathway (mean gene expression level belong to that pathway)
136
+#' @examples
137
+#' list_path_plot<-matrix_plot(DataMatrix=tumo[,1:2],pathway=path)
138
+matrix_plot<-function(DataMatrix,pathway) {
139
+  path_name<-sub(' ', '_',colnames(pathway))
140
+  d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE")
141
+  colnames(pathway)<-d_pr
142
+  zz<-as.data.frame(rowMeans(DataMatrix))
143
+  v<-list()
144
+  for ( k in 1: ncol(pathway)){
145
+    #k=2
146
+    if (length(intersect(rownames(zz),pathway[,k])!=0)){
147
+      print(colnames(path)[k])
148
+      currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k])
149
+      currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,])
150
+      rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common
151
+      v[[k]]<- currentPathway_genes_list_common
152
+      names(v)[k]<-colnames(pathway)[k]
153
+    }
154
+  }  
155
+  PEAmatrix <- matrix( 0,nrow(DataMatrix),ncol(pathway))
156
+  rownames(PEAmatrix) <- as.factor(rownames(DataMatrix))
157
+  colnames(PEAmatrix) <-  as.factor(colnames(pathway))
158
+  for (i in 1:length(v)){
159
+  PEAmatrix[v[[i]],i]<-zz[v[[i]],]
160
+  }
161
+  PEAmatrix<-PEAmatrix[which(rowSums(PEAmatrix) > 0),]
162
+  return(PEAmatrix)
163
+}
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+#' @title Get human KEGG pathway data and a gene expression matrix we obtain a matrix with the gene expression for only pathways given in input .
178
+#' @description plotting_matrix creates a matrix of gene expression for pathways given by the user.   
179
+#' @param DataMatrix  gene expression matrix (eg.TCGA data)
180
+#' @param pathway  pathway data as provided by getKEGGdata
181
+#' @param path_matrix  output of the function matrix_plot
182
+#' @export
183
+#' @return a plot for pathway cross talk
184
+#' @examples
185
+#' mt<-plotting_cross_talk(DataMatrix=tumo[,1:2],pathway=path,path_matrix=list_path_plot)
186
+plotting_cross_talk<-function(DataMatrix,pathway,path_matrix){
187
+  zz<-as.data.frame(rowMeans(DataMatrix))
188
+  v<-list()
189
+  for ( k in 1: ncol(pathway)){
190
+    path_name<-sub(' ', '_',colnames(pathway))
191
+    d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE")
192
+    colnames(pathway)<-d_pr
193
+    if (length(intersect(rownames(zz),pathway[,k])!=0)){
194
+      print(colnames(path)[k])
195
+      currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k])
196
+      currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,])
197
+      rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common
198
+      v[[k]]<- as.factor(currentPathway_genes_list_common)
199
+      names(v)[k]<-colnames(pathway)[k]
200
+    }
201
+  }
202
+  vv<-list()
203
+  mi<-t(path_matrix)
204
+  
205
+  dc<-cor(mi)
206
+  for ( k in 1: length(v)){
207
+    currentPathway_genes_list_common <- intersect(rownames(dc), v[[k]])
208
+    a<-match(currentPathway_genes_list_common,rownames(dc))
209
+    vv[[k]]<- a
210
+    names(vv)[k]<-colnames(pathway)[k]
211
+  }
212
+  list_plt=list(corr=dc,gruppi=vv)
213
+ #r<-qgraph(list_plt$corr, groups=list_plt$gruppi, mar=c(1,1,1,1),minimum=0.6)
214
+  return(list_plt)
215
+}
216
+
217
+
218
+
219
+
220
+#' @title For TCGA data get human pathway data and creates a matrix with the average of genes for each pathway.
221
+#' @description average creates a matrix with a summarized value for each pathway  
222
+#' @param dataFilt TCGA matrix
223
+#' @param pathway pathway data
224
+#' @export
225
+#' @return a matrix value for each pathway 
226
+#' @examples
227
+#' score_mean<-average(dataFilt=tumo[,1:2],path)
228
+average<-function(dataFilt,pathway){
229
+  DataMatrix<-dataFilt
230
+  #dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt))
231
+  #DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),]
232
+  #DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) 
233
+  #rownames(DataMatrix)<-DataMatrix$new.col
234
+  #DataMatrix$new.col<-NULL
235
+
236
+PEAmatrix <- matrix( 0, ncol(pathway),ncol(DataMatrix))
237
+rownames(PEAmatrix) <- colnames(pathway)
238
+colnames(PEAmatrix) <-  colnames(DataMatrix)
239
+listIPA_pathways<-colnames(pathway)
240
+for ( k in 1: nrow(PEAmatrix)){
241
+  #k=1
242
+  currentPathway <- colnames(pathway)[k]
243
+  currentPathway_genes_list_common <- intersect(rownames(DataMatrix), currentPathway_genes<-pathway[,k])
244
+  currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,]
245
+  SumGenes <- colSums(currentPathway_genes_list_commonMatrix)
246
+  AverageGenes <- SumGenes / length(currentPathway_genes_list_common)
247
+  PEAmatrix[k,] <- AverageGenes
248
+}
249
+return(PEAmatrix)
250
+}
251
+
252
+
253
+
254
+  
255
+
256
+
257
+
258
+
259
+
260
+
261
+
262
+
263
+#' @title For TCGA data get human pathway data and creates a measure of cross-talk among pathways 
264
+#' @description euc_dist_crtlk creates a matrix with euclidean distance for pairwise pathways  
265
+#' @param dataFilt TCGA matrix
266
+#' @param pathway pathway data
267
+#' @export
268
+#' @return a matrix value for each pathway 
269
+#' @examples
270
+#' score_euc_dista<-euc_dist_crtlk(dataFilt=tumo[,1:2],path)
271
+euc_dist_crtlk <- function(dataFilt,pathway){
272
+  PEAmatrix<-average(dataFilt,pathway)
273
+  #step 5 distance
274
+  # EUCLIDEA DISTANCE
275
+  df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway
276
+  df=t(df)
277
+  ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze
278
+  colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti
279
+  for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente
280
+    patients <- (PEAmatrix)[,p] 
281
+    distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni
282
+    ma_d[,p]<-distance
283
+  }
284
+  euc_dist<-cbind(df,ma_d) # inserisco label con le relazioni tra i pathway
285
+  return(euc_dist)
286
+}
287
+
288
+
289
+
290
+
291
+#' @title For TCGA data get human pathway data and creates a measure of standard deviations among pathways 
292
+#' @description st_dv creates a matrix with standard deviation for pathways  
293
+#' @param DataMatrix TCGA matrix
294
+#' @param pathway pathway data
295
+#' @export
296
+#' @return a matrix value for each pathway 
297
+#' @examples
298
+#' stand_dev<-st_dv(DataMatrix=tumo[,1:2],pathway=path)
299
+st_dv<-function(DataMatrix,pathway){
300
+#DataMatrix<-dataFilt
301
+
302
+#dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt))
303
+#DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),]
304
+#DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) 
305
+#rownames(DataMatrix)<-DataMatrix$new.col
306
+#DataMatrix$new.col<-NULL
307
+
308
+PEAmatrix_sd <- matrix( 0, ncol(pathway),ncol(DataMatrix))
309
+rownames(PEAmatrix_sd) <- colnames(pathway)
310
+colnames(PEAmatrix_sd) <-  colnames(DataMatrix)
311
+for ( k in 1: nrow(PEAmatrix_sd)){
312
+  print(colnames(pathway)[k])
313
+  currentPathway <- colnames(pathway)[k]
314
+  currentPathway_genes_list_common <- intersect( rownames(DataMatrix), currentPathway_genes<-pathway[,k])
315
+  currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,]
316
+  stdev<-apply(currentPathway_genes_list_commonMatrix,2,sd) #deviazione standard dei pathway
317
+  PEAmatrix_sd[k,] <- stdev
318
+  }
319
+return(PEAmatrix_sd)
320
+}
321
+
322
+
323
+
324
+
325
+
326
+
327
+#' @title For TCGA data get human pathway data and creates a measure of discriminating score among pathways 
328
+#' @description ds_score_crtlk creates a matrix with  discriminating score for pathways  
329
+#' @param dataFilt TCGA matrix
330
+#' @param pathway pathway data
331
+#' @export
332
+#' @return a matrix value for each pathway 
333
+#' @examples
334
+#' cross_talk_st_dv<-ds_score_crtlk(dataFilt=tumo[,1:2],pathway=path)
335
+ds_score_crtlk<-function(dataFilt,pathway){
336
+  PEAmatrix<-average(dataFilt,pathway)
337
+  #step 5 distance
338
+  # EUCLIDEA DISTANCE
339
+  df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway
340
+  df=t(df)
341
+  ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze
342
+  colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti
343
+  for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente
344
+    patients <- (PEAmatrix)[,p] 
345
+    distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni
346
+    ma_d[,p]<-distance
347
+  }
348
+  PEAmatrix_sd<-st_dv(dataFilt,pathway)
349
+  df=combn(rownames(PEAmatrix_sd),2) 
350
+  df=t(df)
351
+  ma<-matrix(0,nrow(df),ncol(PEAmatrix_sd)) # creo matrix che conterr? le somme delle dev st
352
+  colnames(ma)<-colnames(PEAmatrix_sd) # colnames conterr? il nome dei pazienti
353
+  for ( p in 1: ncol(PEAmatrix_sd)){ # per ogni paziente
354
+    patients <- (PEAmatrix_sd)[,p] 
355
+    out <- apply(df, 1, function(x) sum(patients[x])) # calcolo somma delle dev standard tra le possibili combinazioni
356
+    ma[,p]<-out
357
+  }
358
+  score<-ma_d/ma # discriminating score M1-M2/S1+S2
359
+  score<- cbind(df,score)  
360
+return(score)
361
+}
362
+
363
+
364
+
365
+#' @title SVM classification for each feature
366
+#' @description svm class creates a list with auc value  
367
+#' @param TCGA_matrix gene expression matrix
368
+#' @param nfs nfs split data into a training  and test set
369
+#' @param tumour barcode samples for a class
370
+#' @param normal barcode samples for another class
371
+#' @export
372
+#' @importFrom e1071 tune svm 
373
+#' @importFrom ROCR prediction performance 
374
+#' @importFrom  grDevices rainbow
375
+#' @return a list with AUC value for pairwise pathway 
376
+#' @examples
377
+#' nf <- 60
378
+#' res_class<-svm_classification(TCGA_matrix=score_euc_dist,nfs=nf,
379
+#' normal=colnames(norm[,1:10]),tumour=colnames(tumo[,1:10]))
380
+svm_classification<-function(TCGA_matrix,tumour,normal,nfs){
381
+  #library("e1071")
382
+  #library(ROCR)
383
+
384
+  scoreMatrix <- as.data.frame(TCGA_matrix[,3:ncol(TCGA_matrix)])
385
+  scoreMatrix <-as.data.frame(scoreMatrix)
386
+  for( i in 1: ncol(scoreMatrix)){
387
+    scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i]))
388
+  }
389
+
390
+  TCGA_matrix[,1] <- gsub(" ", "_", TCGA_matrix[,1])
391
+  d<-sub('_-_Homo_sapiens_*', '', TCGA_matrix[,1])
392
+  #d_pr<-sub(')*', '', DataMatrix[,1])
393
+  
394
+  d_pr<- gsub("(human)", "", d, fixed="TRUE")
395
+  d_pr <- gsub("_", "", d_pr)
396
+  d_pr <- gsub("-", "", d_pr)
397
+  
398
+  TCGA_matrix[,2] <- gsub(" ", "_", TCGA_matrix[,2])
399
+  d2<-sub('_-_Homo_sapiens_(human)*', '', TCGA_matrix[,2])
400
+  d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
401
+  d_pr2 <- gsub("_", "", d_pr2)
402
+  d_pr2 <- gsub("-", "", d_pr2)
403
+  
404
+  PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
405
+  
406
+  rownames(scoreMatrix) <-PathwaysPair
407
+
408
+  
409
+  tDataMatrix<-as.data.frame(t(scoreMatrix))
410
+  #tDataMatrix$Target[,1]<-0
411
+  
412
+  tDataMatrix<-cbind(Target=0,tDataMatrix )
413
+
414
+  tum<-intersect(rownames(tDataMatrix),tumour)
415
+  nor<-intersect(rownames(tDataMatrix),normal)
416
+  #tDataMatrix$
417
+    
418
+  Dataset_g1<-tDataMatrix[nor,]
419
+  Dataset_g3<- tDataMatrix[tum,]
420
+    
421
+  
422
+#training=read.table('C:/Users/UserInLab05/Desktop/trai.txt',header = TRUE)
423
+#testset=read.table('C:/Users/UserInLab05/Desktop/test.txt',header = TRUE)
424
+
425
+  Dataset_g1$Target <- 0
426
+  Dataset_g3$Target<-1
427
+#Dataset_g3 <- Dataset_g3[Dataset_g3$Target <- 1, ]
428
+  
429
+tab_g1_training <- sample(rownames(Dataset_g1),round(nrow(Dataset_g1) / 100 * nfs ))
430
+tab_g3_training <- sample(rownames(Dataset_g3),round(nrow(Dataset_g3) / 100 * nfs ))
431
+tab_g1_testing <- setdiff(rownames(Dataset_g1),tab_g1_training)
432
+tab_g3_testing <- setdiff(rownames(Dataset_g3),tab_g3_training)
433
+
434
+FR<-intersect(rownames(Dataset_g1),tab_g1_training)
435
+
436
+#rownames(Dataset_g1)<-Dataset_g1[,1]
437
+G1<-Dataset_g1[FR,]
438
+
439
+FR1<-intersect(rownames(Dataset_g3),tab_g3_training)
440
+#rownames(Dataset_g3)<-Dataset_g3$ID
441
+
442
+G3<-Dataset_g3[FR1,]
443
+training<-rbind(G1,G3)
444
+
445
+inter1<-intersect(rownames(Dataset_g1),tab_g1_testing)
446
+#rownames(Dataset_g1)<-Dataset_g1$ID
447
+
448
+G1_testing<-Dataset_g1[inter1,]
449
+
450
+inter2<-intersect(rownames(Dataset_g3),tab_g3_testing)
451
+#rownames(Dataset_g3)<-Dataset_g3$ID
452
+G3_testing<-Dataset_g3[inter2,]
453
+
454
+testing<-rbind(G1_testing,G3_testing)
455
+
456
+x <- subset(training, select=-Target)
457
+y <- training$Target
458
+#testing[,2]<-NULL
459
+z<-subset(testing, select=-Target)
460
+
461
+zi<-testing$Target
462
+
463
+auc.df<-list()
464
+svm_model_after_tune_COMPL<-list()
465
+for( k in 2: ncol(training)){
466
+  print(colnames(training)[k])
467
+  svm_tune <- tune(svm, train.x=x, train.y=y, 
468
+                   kernel="radial", ranges=list(cost=10^(-1:2), gamma=c(.5,1,2)),cross=10)
469
+  #print(svm_tune)
470
+  
471
+  svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters$cost, gamma=svm_tune$best.parameters$gamma,cross=10,probability = TRUE)
472
+  
473
+  
474
+  #svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters[1], gamma=svm_tune$best.parameters[2],cross=10,probability = TRUE)
475
+  #summary(svm_model_after_tune)
476
+
477
+  j=k-1
478
+  z2=z[,j]
479
+  z3<-as.data.frame(z2)
480
+  #rownames(z3)<-rownames(z)
481
+  #colnames(z3)<-as.character(paste("X",j,sep = ""))
482
+  colnames(z3)<-colnames(z)[j]
483