Browse code

replace name

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/StarBioTrek@122485 bc3139a8-67e5-0310-9ffc-ced21a209358

Claudia Cava authored on 16/10/2016 09:58:12
Showing 42 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,39 @@
1
+Package: StarBioTrek
2
+Type: Package
3
+Title: StarBioTrek
4
+Version: 0.99.28
5
+Date: 10-16-2016
6
+Author: Claudia Cava,
7
+    Isabella Castiglioni
8
+Maintainer: Claudia Cava <claudia.cava@ibfm.cnr.it>
9
+Depends:
10
+    R (>= 3.3)
11
+Imports:
12
+    SpidermiR,
13
+	KEGGREST,
14
+	org.Hs.eg.db,
15
+	AnnotationDbi,
16
+	e1071,
17
+	ROCR,
18
+	grDevices
19
+Description: This tool StarBioTrek presents some methodologies to measure pathway activity and cross-talk among pathways integrating also the information of network data. 
20
+License: GPL (>= 3)
21
+biocViews: GeneRegulation,
22
+    Network,
23
+	Pathways,
24
+	KEGG
25
+Suggests:
26
+    BiocStyle,
27
+    knitr,
28
+    rmarkdown,
29
+    testthat,
30
+	devtools,
31
+	roxygen2,
32
+	qgraph,
33
+	png,
34
+	grid
35
+VignetteBuilder: knitr
36
+LazyData: true
37
+URL: https://github.com/claudiacava/StarBioTrek
38
+BugReports: https://github.com/claudiacava/StarBioTrek/issues
39
+RoxygenNote: 5.0.1
0 40
\ No newline at end of file
1 41
new file mode 100644
... ...
@@ -0,0 +1,27 @@
1
+# Generated by roxygen2: do not edit by hand
2
+
3
+export(GE_matrix)
4
+export(SelectedSample)
5
+export(average)
6
+export(ds_score_crtlk)
7
+export(euc_dist_crtlk)
8
+export(getKEGGdata)
9
+export(getNETdata)
10
+export(list_path_net)
11
+export(plotting_cross_talk)
12
+export(proc_path)
13
+export(st_dv)
14
+export(svm_classification)
15
+importFrom(AnnotationDbi,as.list)
16
+importFrom(AnnotationDbi,mappedkeys)
17
+importFrom(KEGGREST,keggGet)
18
+importFrom(KEGGREST,keggList)
19
+importFrom(ROCR,performance)
20
+importFrom(ROCR,prediction)
21
+importFrom(SpidermiR,SpidermiRdownload_net)
22
+importFrom(SpidermiR,SpidermiRquery_spec_networks)
23
+importFrom(SpidermiR,SpidermiRquery_species)
24
+importFrom(e1071,svm)
25
+importFrom(e1071,tune)
26
+importFrom(grDevices,rainbow)
27
+importFrom(org.Hs.eg.db,org.Hs.egSYMBOL2EG)
0 28
new file mode 100644
... ...
@@ -0,0 +1,6 @@
1
+  StarBioTrek 
2
+----------------------------------------------------------------
3
+  FIRST VERSION - FEATURES
4
+
5
+* getKEGGdata	Searching by KEGG data.
6
+* getNETdata	Searching by network data.
0 7
new file mode 100644
... ...
@@ -0,0 +1,64 @@
1
+#' Download data
2
+#'
3
+#' StarBioTrek allows you to Download data of samples from StarBioTrek
4
+#'
5
+#' The functions you're likely to need from \pkg{StarBioTrek} is
6
+#' \code{path_star}
7
+#'Otherwise refer to the vignettes to see
8
+#' how to format the documentation.
9
+#'
10
+#' @docType package
11
+#' @name StarBioTrek
12
+NULL
13
+
14
+#' Pathway data from KEGG
15
+#' @docType data
16
+#' @keywords internal
17
+#' @name path
18
+#' @format A data frame with rows and  variables
19
+NULL
20
+
21
+#' network data
22
+#' @docType data
23
+#' @keywords internal
24
+#' @name netw
25
+#' @format A data frame with  rows and variables
26
+NULL
27
+
28
+
29
+
30
+
31
+#' TCGA data
32
+#' @docType data
33
+#' @keywords internal
34
+#' @name Data_CANCER_normUQ_filt
35
+#' @format A data frame with rows and variables
36
+NULL
37
+
38
+#' Score Matrix of pairwise pathway using euclidean distance
39
+#' @docType data
40
+#' @keywords internal
41
+#' @name score_euc_dist
42
+#' @format A data frame with rows and variables
43
+NULL
44
+
45
+#' TCGA data with normal samples
46
+#' @docType data
47
+#' @keywords internal
48
+#' @name norm
49
+#' @format A data frame with rows and variables
50
+NULL
51
+
52
+#' TCGA data with tumour samples
53
+#' @docType data
54
+#' @keywords internal
55
+#' @name tumo
56
+#' @format A data frame with rows and variables
57
+NULL
58
+
59
+#' A matrix of gene expression for pathways given by the user. 
60
+#' @docType data
61
+#' @keywords internal
62
+#' @name list_path_plot
63
+#' @format A data frame with rows and variables
64
+NULL
0 65
\ No newline at end of file
1 66
new file mode 100644
... ...
@@ -0,0 +1,204 @@
1
+#' @title Get human KEGG pathway data.
2
+#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway 
3
+#' @param KEGG_path  variable
4
+#' @export
5
+#' @importFrom KEGGREST keggList keggGet
6
+#' @importFrom org.Hs.eg.db org.Hs.egSYMBOL2EG
7
+#' @importFrom AnnotationDbi mappedkeys as.list
8
+#' @return dataframe with human pathway data
9
+#' @examples
10
+#' path<-getKEGGdata(KEGG_path="Transcript")
11
+getKEGGdata<-function(KEGG_path){
12
+if (KEGG_path=="Carb_met") {
13
+  mer<-select_path_carb(Carbohydrate)
14
+  c<-proc_path(mer)
15
+  a<-c[[2]]
16
+}
17
+  if (KEGG_path=="Ener_met") {
18
+    mer<-select_path_en(Energy)
19
+    c<-proc_path(mer)
20
+    a<-c[[2]]
21
+  }
22
+  if (KEGG_path=="Lip_met") {
23
+    mer<-select_path_lip(Lipid)
24
+    c<-proc_path(mer)
25
+    a<-c[[2]]
26
+  }
27
+  if (KEGG_path=="Amn_met") {
28
+    mer<-select_path_amn(Aminoacid)
29
+    c<-proc_path(mer)
30
+    a<-c[[2]]
31
+  }
32
+  if (KEGG_path=="Gly_bio_met") {
33
+    mer<-select_path_gly(Glybio_met)
34
+    c<-proc_path(mer)
35
+    a<-c[[2]]
36
+  }
37
+  if (KEGG_path=="Cof_vit_met") {
38
+    mer<-select_path_gly(Cofa_vita_met)
39
+    c<-proc_path(mer)
40
+    a<-c[[2]]
41
+  }
42
+  if (KEGG_path=="Transcript") {
43
+    mer<-select_path_transc(Transcription)
44
+    c<-proc_path(mer)
45
+    a<-c[[2]]
46
+  }
47
+  if (KEGG_path=="Transl") {
48
+    mer<-select_path_transl(Translation)
49
+    c<-proc_path(mer)
50
+    a<-c[[2]]
51
+  }
52
+  if (KEGG_path=="Fold_degr") {
53
+    mer<-select_path_fold(Folding_sorting_and_degradation)
54
+    c<-proc_path(mer)
55
+    a<-c[[2]]
56
+  }
57
+  if (KEGG_path=="Repl_repair") {
58
+    mer<-select_path_repl(Replication_and_repair)
59
+    c<-proc_path(mer)
60
+    a<-c[[2]]
61
+  }
62
+  if (KEGG_path=="sign_transd") {
63
+    mer<-select_path_sign(Signal_transduction)
64
+    c<-proc_path(mer)
65
+    a<-c[[2]]
66
+  }
67
+  if (KEGG_path=="sign_mol_int") {
68
+    mer<-select_path_sign_mol(Signaling_molecules_and_interaction)
69
+    c<-proc_path(mer)
70
+    a<-c[[2]]
71
+  }
72
+  if (KEGG_path=="Transp_cat") {
73
+    mer<-select_path_transp_ca(Transport_and_catabolism)
74
+    c<-proc_path(mer)
75
+    a<-c[[2]]
76
+  }
77
+  if (KEGG_path=="cell_grow_d") {
78
+    mer<-select_path_cell_grow(Cell_growth_and_death)
79
+    c<-proc_path(mer)
80
+    a<-c[[2]]
81
+  }
82
+  if (KEGG_path=="cell_comm") {
83
+    mer<-select_path_cell_comm(Cellular_community)
84
+    c<-proc_path(mer)
85
+    a<-c[[2]]
86
+  }
87
+  if (KEGG_path=="imm_syst") {
88
+    mer<-select_path_imm_syst(Immune_system)
89
+    c<-proc_path(mer)
90
+    a<-c[[2]]
91
+  }
92
+  if (KEGG_path=="end_syst") {
93
+    mer<-select_path_end_syst(Endocrine_system)
94
+    c<-proc_path(mer)
95
+    a<-c[[2]]
96
+  }
97
+  if (KEGG_path=="circ_syst") {
98
+    mer<-select_path_circ_syst(Circulatory_system)
99
+    c<-proc_path(mer)
100
+    a<-c[[2]]
101
+  } 
102
+  if (KEGG_path=="dig_syst") {
103
+    mer<-select_path_dig_syst(Digestive_system)
104
+    c<-proc_path(mer)
105
+    a<-c[[2]]
106
+  } 
107
+  if (KEGG_path=="exc_syst") {
108
+    mer<-select_path_exc_syst(Excretory_system)
109
+    c<-proc_path(mer)
110
+    a<-c[[2]]
111
+  }  
112
+  if (KEGG_path=="nerv_syst") {
113
+    mer<-select_path_ner_syst(Nervous_system)
114
+    c<-proc_path(mer)
115
+    a<-c[[2]]
116
+  } 
117
+  if (KEGG_path=="sens_syst") {
118
+    mer<-select_path_sens_syst(Sensory_system)
119
+    c<-proc_path(mer)
120
+    a<-c[[2]]
121
+  } 
122
+if (KEGG_path=="KEGG_path") {
123
+  pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways
124
+pathway.codes <- sub("path:", "", names(pathways.list))
125
+pathways.list<-list(pathways.list)
126
+pathways.list<-pathways.list[lapply(pathways.list,length)!=0] 
127
+a<-do.call("cbind", pathways.list)
128
+}
129
+pathway.codes<-c[[1]]
130
+genes.by.pathway <- sapply(pathway.codes,
131
+                           function(pwid){
132
+                             pw <- keggGet(pwid)
133
+                             pw[[1]]$GENE[c(TRUE, FALSE)]
134
+                           })
135
+x <- org.Hs.egSYMBOL2EG
136
+mapped_genes <- mappedkeys(x)
137
+xx <- as.list(x[mapped_genes])
138
+top3 <- matrix(0, length(xx), length(genes.by.pathway))
139
+rownames(top3) <- names(xx)
140
+colnames(top3)<- names(genes.by.pathway)
141
+for (j in  1:length(xx)){
142
+  for (k in  1:length(genes.by.pathway)){
143
+    if (length(intersect(xx[[j]],genes.by.pathway[[k]])!=0)){
144
+      
145
+      top3[j,k]<-names(xx[j]) 
146
+    }
147
+  }
148
+}
149
+top3[top3 == 0] <- " "
150
+#a<-data.frame(pathways.list)
151
+#i <- sapply(a, is.factor)
152
+#a[i] <- lapply(a[i], as.character)
153
+rownames(a)<-sub("path:","",rownames(a))
154
+PROVA<-top3
155
+for( i in 1:ncol(PROVA)) {
156
+  if (colnames(PROVA)[i]==rownames(a)[i]){
157
+    colnames(PROVA)[i]<-a[i]
158
+}
159
+}
160
+return(PROVA)
161
+}
162
+
163
+
164
+#' @title Get network data.
165
+#' @description getNETdata creates a data frame with network data. 
166
+#' Network category can be filtered among: physical interactions, co-localization, genetic interactions and shared protein domain.
167
+#' @param network  variable. The user can use the following parameters 
168
+#' based on the network types to be used. PHint for Physical_interactions,
169
+#' COloc for Co-localization, GENint for Genetic_interactions and
170
+#' SHpd for Shared_protein_domains
171
+#' @param organism organism==NULL default value is homo sapiens
172
+#' @export
173
+#' @importFrom SpidermiR SpidermiRquery_species SpidermiRquery_spec_networks SpidermiRdownload_net 
174
+#' @return dataframe with gene-gene (or protein-protein interactions)
175
+#' @examples
176
+#' organism="Saccharomyces_cerevisiae"
177
+#' netw<-getNETdata(network="SHpd",organism)
178
+getNETdata<-function(network,organism=NULL){
179
+  org_shar_pro<-SpidermiRquery_species(species)
180
+  if (is.null(organism)) {
181
+  net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[6,],network)
182
+  out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot)
183
+ # geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[6,],data = out_net_shar_pro)
184
+  }
185
+  if( !is.null(organism) ){
186
+    net_shar_prot<-SpidermiRquery_spec_networks(organismID = org_shar_pro[9,],network)
187
+    out_net_shar_pro<-SpidermiRdownload_net(net_shar_prot)
188
+  #  geneSymb_net_shar_pro<-SpidermiRprepare_NET(organismID = org_shar_pro[9,],data = out_net_shar_pro)
189
+}
190
+  #ds_shar_pro<-do.call("rbind", geneSymb_net_shar_pro)
191
+  #data_shar_pro<-as.data.frame(ds_shar_pro[!duplicated(ds_shar_pro), ]) 
192
+  #sdc_shar_pro<-unlist(data_shar_pro$gene_symbolA,data_shar_pro$gene_symbolB)
193
+  #m_shar_pro<-c(data_shar_pro$gene_symbolA)
194
+  #m2_shar_pro<-c(data_shar_pro$gene_symbolB)
195
+  #ss_shar_pro<-cbind(m_shar_pro,m2_shar_pro)
196
+  #data_pr_shar_pro<-as.data.frame(ss_shar_pro[!duplicated(ss_shar_pro), ]) 
197
+  #colnames(data_pr_shar_pro) <- c("m_shar_pro", "m2_shar_pro")
198
+return(out_net_shar_pro)
199
+}
200
+
201
+
202
+
203
+
204
+
0 205
new file mode 100644
... ...
@@ -0,0 +1,534 @@
1
+#overlap <- function(net_type,x,currentPathway_genes){
2
+ # de<-net_type[which(net_type$m_shar_pro==x),]
3
+#  fr<-intersect(de$m2_shar_pro,currentPathway_genes)
4
+ # go=list()
5
+  #if(length(fr)!=0)    {
6
+   # for (i in 1:length(fr)){
7
+   #   de2<-de[which(de$m2_shar_pro==fr[i]),]
8
+    #  go[[i]]<-de2
9
+    #}
10
+  #}            
11
+#  dst<-do.call("rbind", go)
12
+ # return(dst)
13
+#}
14
+
15
+
16
+select_path_carb<-function(Carbohydrate){
17
+species<-c("- Homo sapiens (human)")  
18
+a<-paste("Glycolysis / Gluconeogenesis", species)
19
+b<-paste("Citrate cycle (TCA cycle)", species)
20
+c<-paste("Pentose phosphate pathway", species)
21
+d<-paste("Pentose and glucuronate interconversions", species)
22
+e<-paste("Fructose and mannose metabolism", species)
23
+f<-paste("Galactose metabolism", species)
24
+g<-paste("Ascorbate and aldarate metabolism", species)
25
+h<-paste("Starch and sucrose metabolism", species)
26
+i<-paste("Amino sugar and nucleotide sugar metabolism", species)
27
+l<-paste("Pyruvate metabolism", species)
28
+m<-paste("Glyoxylate and dicarboxylate metabolism", species)
29
+n<-paste("Propanoate metabolism", species)
30
+o<-paste("Butanoate metabolism", species)
31
+p<-paste("C5-Branched dibasic acid metabolism", species)
32
+q<-paste("Inositol phosphate metabolism", species)
33
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q)
34
+return(mer)
35
+}
36
+
37
+select_path_en<-function(Energy){
38
+  species<-c("- Homo sapiens (human)")  
39
+  r<-paste("Oxidative phosphorylation", species)
40
+  s<-paste("Photosynthesis", species)
41
+  t<-paste("Photosynthesis - antenna proteins", species)
42
+  v<-paste("Carbon fixation in photosynthetic organisms", species)
43
+  u<-paste("Carbon fixation pathways in prokaryotes", species)
44
+  z<-paste("Methane metabolism", species)
45
+  aa<-paste("Nitrogen metabolism", species)
46
+  ab<-paste("Sulfur metabolism", species)
47
+  mer<-c(r,s,t,v,u,z,aa,ab)
48
+  return(mer)
49
+}  
50
+  
51
+
52
+select_path_lip<-function(Lipid){ 
53
+  species<-c("- Homo sapiens (human)")  
54
+ac<-paste("Fatty acid biosynthesis", species)
55
+ad<-paste("Fatty acid elongation", species)
56
+ae<-paste("Fatty acid degradation", species)
57
+af<-paste("Synthesis and degradation of ketone bodies", species)
58
+ag<-paste("Cutin, suberine and wax biosynthesis", species)
59
+ah<-paste("Steroid biosynthesis", species)
60
+ai<-paste("Primary bile acid biosynthesis", species)
61
+al<-paste("Secondary bile acid biosynthesis", species)
62
+am<-paste("Steroid hormone biosynthesis", species)
63
+an<-paste("Glycerolipid metabolism", species)
64
+ao<-paste("Glycerophospholipid metabolism", species)
65
+ap<-paste("Ether lipid metabolism", species)
66
+aq<-paste("Sphingolipid metabolism", species)
67
+ar<-paste("Arachidonic acid metabolism", species)
68
+as<-paste("Linoleic acid metabolism", species)
69
+at<-paste("alpha-Linolenic acid metabolism", species)
70
+av<-paste("Biosynthesis of unsaturated fatty acids", species)
71
+
72
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av)
73
+return(mer)
74
+}
75
+
76
+
77
+
78
+
79
+select_path_amn<-function(Aminoacid){ 
80
+  species<-c("- Homo sapiens (human)")  
81
+ac<-paste("Alanine, aspartate and glutamate metabolism", species)
82
+ad<-paste("Glycine, serine and threonine metabolism", species)
83
+ae<-paste("Cysteine and methionine metabolism", species)
84
+af<-paste("Valine, leucine and isoleucine degradation", species)
85
+ag<-paste("Valine, leucine and isoleucine biosynthesis", species)
86
+ah<-paste("Lysine biosynthesis", species)
87
+ai<-paste("Lysine degradation", species)
88
+al<-paste("Arginine biosynthesis", species)
89
+am<-paste("Arginine and proline metabolism", species)
90
+an<-paste("Histidine metabolism", species)
91
+ao<-paste("Tyrosine metabolism", species)
92
+ap<-paste("Phenylalanine metabolism", species)
93
+aq<-paste("Tryptophan metabolism", species)
94
+ar<-paste("Phenylalanine, tyrosine and tryptophan biosynthesis", species)
95
+as<-paste("beta-Alanine metabolism", species)
96
+at<-paste("Taurine and hypotaurine metabolism", species)
97
+av<-paste("Phosphonate and phosphinate metabolism", species)
98
+au<-paste("Selenocompound metabolism", species)
99
+az<-paste("Cyanoamino acid metabolism", species)
100
+a<-paste("D-Glutamine and D-glutamate metabolism", species)
101
+b<-paste("D-Arginine and D-ornithine metabolism", species)
102
+c<-paste("D-Alanine metabolism", species)
103
+d<-paste("Glutathione metabolism", species)
104
+
105
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as,at,av,au,az,a,b,c,d)
106
+return(mer)
107
+}
108
+
109
+select_path_gly<-function(Glybio_met){ 
110
+ac<-paste("N-Glycan biosynthesis", species)
111
+ad<-paste("Various types of N-glycan biosynthesis", species)
112
+ae<-paste("Mucin type O-Glycan biosynthesis", species)
113
+af<-paste("Other types of O-glycan biosynthesis", species)
114
+ag<-paste("Glycosaminoglycan biosynthesis - CS/DS", species)
115
+ah<-paste("Glycosaminoglycan biosynthesis - HS/Hep", species)
116
+ai<-paste("Glycosaminoglycan biosynthesis - KS", species)
117
+al<-paste("Glycosaminoglycan degradation", species)
118
+am<-paste("Glycosylphosphatidylinositol(GPI)-anchor biosynthesis", species)
119
+an<-paste("Glycosphingolipid biosynthesis - lacto and neolacto series", species)
120
+ao<-paste("Glycosphingolipid biosynthesis - globo series", species)
121
+ap<-paste("Glycosphingolipid biosynthesis - ganglio series", species)
122
+aq<-paste("Lipopolysaccharide biosynthesis", species)
123
+ar<-paste("Peptidoglycan biosynthesis", species)
124
+as<-paste("Other glycan degradation", species)
125
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap,aq,ar,as)
126
+return(mer)
127
+}
128
+
129
+
130
+
131
+select_path_cofa<-function(Cofa_vita_met){ 
132
+  species<-c("- Homo sapiens (human)")  
133
+ac<-paste("Thiamine metabolism", species)
134
+ad<-paste("Riboflavin metabolism", species)
135
+ae<-paste("Vitamin B6 metabolism", species)
136
+af<-paste("Nicotinate and nicotinamide metabolism", species)
137
+ag<-paste("Pantothenate and CoA biosynthesis", species)
138
+ah<-paste("Biotin metabolism", species)
139
+ai<-paste("Lipoic acid metabolism", species)
140
+al<-paste("Folate biosynthesis", species)
141
+am<-paste("One carbon pool by folate", species)
142
+an<-paste("Retinol metabolism", species)
143
+ao<-paste("Porphyrin and chlorophyll metabolism", species)
144
+ap<-paste("Ubiquinone and other terpenoid-quinone biosynthesis", species) 	
145
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao,ap)
146
+return(mer)
147
+}
148
+
149
+select_path_transc<-function(Transcription){ 
150
+  species<-c("- Homo sapiens (human)")  
151
+ac<-paste("RNA polymerase", species)
152
+ad<-paste("Basal transcription factors", species)
153
+ae<-paste("Spliceosome", species)
154
+af<-paste("Transcription factors", species)
155
+ag<-paste("Transcription machinery", species)
156
+mer<-c(ac,ad,ae,af,ag)
157
+return(mer)
158
+}
159
+
160
+
161
+
162
+select_path_transl<-function(Translation){ 
163
+  species<-c("- Homo sapiens (human)")  
164
+ac<-paste("Ribosome", species)
165
+ad<-paste("Aminoacyl-tRNA biosynthesis", species)
166
+ae<-paste("RNA transport", species)
167
+af<-paste("mRNA surveillance pathway", species)
168
+ag<-paste("Ribosome biogenesis in eukaryotes", species)
169
+ah<-paste("Ribosomal proteins", species)
170
+ai<-paste("Ribosome biogenesis", species)
171
+al<-paste("Transfer RNA biogenesis", species)
172
+am<-paste("Translation factors", species)
173
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am)
174
+return(mer)
175
+}
176
+
177
+select_path_fold<-function(Folding_sorting_and_degradation){ 
178
+  species<-c("- Homo sapiens (human)")  
179
+ac<-paste("Protein export", species)
180
+ad<-paste("Protein processing in endoplasmic reticulum", species)
181
+ae<-paste("SNARE interactions in vesicular transport", species)
182
+af<-paste("Ubiquitin mediated proteolysis", species)
183
+ag<-paste("Sulfur relay system", species)
184
+ah<-paste("RNA degradation", species)
185
+ai<-paste("Chaperones and folding catalysts", species)
186
+al<-paste("SNAREs", species)
187
+am<-paste("Ubiquitin system", species)
188
+an<-paste("Proteasome", species)
189
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an)
190
+return(mer)
191
+}
192
+
193
+
194
+
195
+
196
+select_path_repl<-function(Replication_and_repair){ 
197
+  species<-c("- Homo sapiens (human)")  
198
+ac<-paste("DNA replication", species)
199
+ad<-paste("Base excision repair", species)
200
+ae<-paste("Nucleotide excision repair", species)
201
+af<-paste("Mismatch repair", species)
202
+ag<-paste("Homologous recombination", species)
203
+ah<-paste("Non-homologous end-joining", species)
204
+ai<-paste("Fanconi anemia pathway", species)
205
+al<-paste("DNA replication proteins", species)
206
+am<-paste("Chromosome", species)
207
+an<-paste("DNA repair and recombination", species)
208
+ao<-paste("proteins", species)
209
+mer<-c(ac,ad,ae,af,ag,ah,ai,al,am,an,ao)
210
+return(mer)
211
+}
212
+
213
+
214
+
215
+select_path_sign<-function(Signal_transduction){ 
216
+  species<-c("- Homo sapiens (human)")  
217
+a<-paste("Ras signaling pathway", species)
218
+b<-paste("Rap1 signaling pathway", species)
219
+c<-paste("MAPK signaling pathway", species)
220
+d<-paste("ErbB signaling pathway", species)
221
+e<-paste("Wnt signaling pathway", species)
222
+f<-paste("Notch signaling pathway", species)
223
+g<-paste("Hedgehog signaling pathway", species)
224
+h<-paste("TGF-beta signaling pathway", species)
225
+i<-paste("Hippo signaling pathway", species)
226
+l<-paste("VEGF signaling pathway", species)
227
+m<-paste("Jak-STAT signaling pathway", species)
228
+n<-paste("NF-kappa B signaling pathway", species)
229
+o<-paste("TNF signaling pathway", species)
230
+p<-paste("HIF-1 signaling pathway", species)
231
+q<-paste("FoxO signaling pathway", species)
232
+r<-paste("Calcium signaling pathway", species)
233
+s<-paste("Phosphatidylinositol signaling system", species)
234
+t<-paste("Phospholipase D signaling pathway", species)
235
+v<-paste("Sphingolipid signaling pathway", species)
236
+u<-paste("cAMP signaling pathway", species)
237
+z<-paste("cGMP-PKG signaling pathway", species)
238
+ab<-paste("PI3K-Akt signaling pathway", species)
239
+ac<-paste("AMPK signaling pathway", species)
240
+ad<-paste("mTOR signaling pathway", species)
241
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t,v,u,z,ab,ac,ad)
242
+return(mer)
243
+}
244
+
245
+
246
+select_path_sign_mol<-function(Signaling_molecules_and_interaction){ 
247
+  species<-c("- Homo sapiens (human)")  
248
+a<-paste("Neuroactive ligand-receptor interaction", species)
249
+b<-paste("Cytokine-cytokine receptor interaction", species)
250
+c<-paste("ECM-receptor interaction", species)
251
+d<-paste("Cell adhesion molecules (CAMs)", species)
252
+mer<-c(a,b,c,d)
253
+return(mer)
254
+}
255
+
256
+
257
+select_path_transp_ca<-function(Transport_and_catabolism){ 
258
+  species<-c("- Homo sapiens (human)")  
259
+a<-paste("Endocytosis", species)
260
+b<-paste("Phagosome", species)
261
+c<-paste("Lysosome", species)
262
+d<-paste("Peroxisome", species)
263
+e<-paste("Regulation of autophagy", species)
264
+mer<-c(a,b,c,d,e)
265
+return(mer)
266
+}
267
+
268
+select_path_cell_grow<-function(Cell_growth_and_death){ 
269
+  species<-c("- Homo sapiens (human)")  
270
+  a<-paste("Cell cycle", species)
271
+b<-paste("Apoptosis", species)
272
+c<-paste("p53 signaling pathway", species)
273
+mer<-c(a,b,c)
274
+return(mer)
275
+}
276
+
277
+
278
+select_path_cell_comm<-function(Cellular_community){ 
279
+  species<-c("- Homo sapiens (human)")  
280
+  a<-paste("Focal adhesion", species)
281
+b<-paste("Adherens junction", species)
282
+c<-paste("Tight junction", species)
283
+d<-paste("Gap junction", species)
284
+e<-paste("Signaling pathways regulating pluripotency of stem cells ", species)
285
+mer<-c(a,b,c,d,e)
286
+return(mer)
287
+}
288
+
289
+
290
+select_path_imm_syst<-function(Immune_system){
291
+  species<-c("- Homo sapiens (human)")  
292
+a<-paste("Hematopoietic cell lineage", species)
293
+b<-paste("Complement and coagulation cascades", species)
294
+c<-paste("Platelet activation", species)
295
+d<-paste("Toll-like receptor signaling pathway", species)
296
+e<-paste("Toll and Imd signaling pathway", species)
297
+f<-paste("NOD-like receptor signaling pathway", species)
298
+g<-paste("RIG-I-like receptor signaling pathway", species)
299
+h<-paste("Cytosolic DNA-sensing pathway", species)
300
+i<-paste("Natural killer cell mediated cytotoxicity", species)
301
+l<-paste("Antigen processing and presentation", species)
302
+m<-paste("T cell receptor signaling pathway", species)
303
+n<-paste("B cell receptor signaling pathway", species)
304
+o<-paste("Fc epsilon RI signaling pathway", species)
305
+p<-paste("Fc gamma R-mediated phagocytosis", species)
306
+q<-paste("Leukocyte transendothelial migration", species)
307
+r<-paste("Intestinal immune network for IgA production", species)
308
+s<-paste("Chemokine signaling pathway", species)
309
+
310
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s)
311
+return(mer)
312
+}
313
+
314
+
315
+
316
+
317
+select_path_end_syst<-function(Endocrine_system){ 
318
+  species<-c("- Homo sapiens (human)")  
319
+a<-paste("Insulin secretion", species)
320
+b<-paste("Insulin signaling pathway", species)
321
+c<-paste("Glucagon signaling pathway", species)
322
+d<-paste("Regulation of lipolysis in adipocytes", species)
323
+e<-paste("Adipocytokine signaling pathway", species)
324
+f<-paste("PPAR signaling pathway", species)
325
+g<-paste("GnRH signaling pathway", species)
326
+h<-paste("Ovarian steroidogenesis", species)
327
+i<-paste("Estrogen signaling pathway", species)
328
+l<-paste("Progesterone-mediated oocyte maturation", species)
329
+m<-paste("Prolactin signaling pathway", species)
330
+n<-paste("Oxytocin signaling pathway", species)
331
+o<-paste("Thyroid hormone synthesis", species)
332
+p<-paste("Thyroid hormone signaling pathway", species)
333
+q<-paste("Melanogenesis", species)
334
+r<-paste("Renin secretion", species)
335
+s<-paste("Renin-angiotensin system", species)
336
+t<-paste("Aldosterone synthesis and secretion", species)
337
+
338
+
339
+mer<-c(a,b,c,d,e,f,g,h,i,l,m,n,o,p,q,r,s,t)
340
+return(mer)
341
+}
342
+
343
+
344
+select_path_circ_syst<-function(Circulatory_system){ 
345
+  species<-c("- Homo sapiens (human)")  
346
+  a<-paste("Cardiac muscle contraction", species)
347
+b<-paste("Adrenergic signaling in cardiomyocytes", species)
348
+c<-paste("Vascular smooth muscle contraction", species)
349
+mer<-c(a,b,c)
350
+return(mer)
351
+}
352
+
353
+
354
+select_path_dig_syst<-function(Digestive_system){ 
355
+  species<-c("- Homo sapiens (human)")  
356
+  a<-paste("Salivary secretion", species)
357
+b<-paste("Gastric acid secretion", species)
358
+c<-paste("Pancreatic secretion", species)
359
+d<-paste("Bile secretion", species)
360
+e<-paste("Carbohydrate digestion and absorption", species)
361
+f<-paste("Protein digestion and absorption", species)
362
+g<-paste("Fat digestion and absorption", species)
363
+h<-paste("Vitamin digestion and absorption", species)
364
+i<-paste("Mineral absorption", species)
365
+
366
+mer<-c(a,b,c,d,e,f,g,h,i)
367
+return(mer)
368
+}
369
+
370
+
371
+
372
+select_path_exc_syst<-function(Excretory_system){ 
373
+  species<-c("- Homo sapiens (human)")  
374
+  a<-paste("Vasopressin-regulated water reabsorption", species)
375
+b<-paste("Aldosterone-regulated sodium reabsorption", species)
376
+c<-paste("Endocrine and other factor-regulated calcium reabsorption", species)
377
+d<-paste("Proximal tubule bicarbonate reclamation", species)
378
+e<-paste("Collecting duct acid secretion", species)
379
+
380
+
381
+mer<-c(a,b,c,d,e)
382
+return(mer)
383
+}
384
+
385
+
386
+select_path_ner_syst<-function(Nervous_system){
387
+  species<-c("- Homo sapiens (human)")  
388
+a<-paste("Glutamatergic synapse", species)
389
+b<-paste("GABAergic synapse", species)
390
+c<-paste("Cholinergic synapse", species)
391
+d<-paste("Dopaminergic synapse", species)
392
+e<-paste("Serotonergic synapse", species)
393
+f<-paste("Long-term potentiation", species)
394
+g<-paste("Long-term depression", species)
395
+h<-paste("Retrograde endocannabinoid signaling", species)
396
+i<-paste("Synaptic vesicle cycle", species)
397
+l<-paste("Neurotrophin signaling pathway", species)
398
+
399
+mer<-c(a,b,c,d,e,f,g,h,i,l)
400
+return(mer)
401
+}
402
+
403
+
404
+select_path_sens_syst<-function(Sensory_system){ 
405
+  species<-c("- Homo sapiens (human)")  
406
+  a<-paste("Phototransduction", species)
407
+b<-paste("Olfactory transduction", species)
408
+c<-paste("Taste transduction", species)
409
+d<-paste("Inflammatory mediator regulation of TRP channels", species)
410
+mer<-c(a,b,c,d)
411
+return(mer)
412
+}
413
+
414
+
415
+
416
+#' @title Select the class of TCGA data
417
+#' @description select two labels from ID barcode
418
+#' @param Dataset gene expression matrix
419
+#' @param typesample the labels of the samples (e.g. tumor,normal)
420
+#' @export
421
+#' @return a gene expression matrix of the samples with specified label
422
+#' @examples
423
+#' tumo<-SelectedSample(Dataset=Data_CANCER_normUQ_filt,typesample="tumor")[,2]
424
+SelectedSample <- function(Dataset,typesample){
425
+  if( typesample =="tumor"){
426
+    Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) == 01) ]
427
+  }
428
+  
429
+  if( typesample =="normal"){
430
+    Dataset <- Dataset[,which( as.numeric(substr(colnames(Dataset), 14, 15)) >= 10) ]
431
+  }
432
+  
433
+  return(Dataset)
434
+  
435
+}
436
+
437
+
438
+#' @title Select the class of TCGA data
439
+#' @description select two labels from ID barcode
440
+#' @param cutoff cut-off for AUC value
441
+#' @param auc.df list of AUC value
442
+#' @return a gene expression matrix with only pairwise pathway with a particular cut-off
443
+select_class<-function(auc.df,cutoff){
444
+ds<-do.call("rbind", auc.df)
445
+tmp_ordered <- as.data.frame(ds[order(ds,decreasing=TRUE),])
446
+colnames(tmp_ordered)<-'pathway'
447
+er<-as.data.frame(tmp_ordered$pathway>cutoff)
448
+ase<-tmp_ordered[tmp_ordered$pathway>cutoff,]
449
+rownames(er)<-rownames(tmp_ordered)
450
+er[,2]<-tmp_ordered$pathway
451
+lipid_metabolism<-er[1:length(ase),]
452
+return(lipid_metabolism)
453
+}
454
+
455
+
456
+
457
+
458
+#' @title Process matrix TCGA data after the selection of pairwise pathway
459
+#' @description processing gene expression matrix
460
+#' @param measure matrix with measure of cross-talk among pathways
461
+#' @param list_perf output of the function select_class 
462
+#' @return a gene expression matrix for case study 1
463
+process_matrix<-function(measure,list_perf){
464
+scoreMatrix <- as.data.frame(measure[,3:ncol(measure)])
465
+for( i in 1: ncol(scoreMatrix)){
466
+  scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i]))
467
+}
468
+measure[,1] <- gsub(" ", "_", measure[,1])
469
+d<-sub('_-_Homo_sapiens_*', '', measure[,1])
470
+d_pr<- gsub("(human)", "", d, fixed="TRUE")
471
+d_pr <- gsub("_", "", d_pr)
472
+d_pr <- gsub("-", "", d_pr)
473
+measure[,2] <- gsub(" ", "_", measure[,2])
474
+d2<-sub('_-_Homo_sapiens_(human)*', '', measure[,2])
475
+d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
476
+d_pr2 <- gsub("_", "", d_pr2)
477
+d_pr2 <- gsub("-", "", d_pr2)
478
+PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
479
+rownames(scoreMatrix) <-PathwaysPair
480
+intera<-intersect(rownames(scoreMatrix),rownames(list_perf))
481
+path_bestlipd<-scoreMatrix[intera,]
482
+return(path_bestlipd)
483
+}
484
+
485
+
486
+
487
+process_matrix_cell_process<-function(measure_cell_process){
488
+score__cell_grow_d <- as.data.frame(measure_cell_process[,3:ncol(measure_cell_process)])
489
+for( i in 1: ncol(score__cell_grow_d)){
490
+  score__cell_grow_d[,i] <- as.numeric(as.character(score__cell_grow_d[,i]))
491
+}
492
+
493
+measure_cell_process[,1] <- gsub(" ", "_", measure_cell_process[,1])
494
+d<-sub('_-_Homo_sapiens_*', '', measure_cell_process[,1])
495
+
496
+d_pr<- gsub("(human)", "", d, fixed="TRUE")
497
+d_pr <- gsub("_", "", d_pr)
498
+d_pr <- gsub("-", "", d_pr)
499
+
500
+measure_cell_process[,2] <- gsub(" ", "_", measure_cell_process[,2])
501
+d2<-sub('_-_Homo_sapiens_(human)*', '', measure_cell_process[,2])
502
+d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
503
+d_pr2 <- gsub("_", "", d_pr2)
504
+d_pr2 <- gsub("-", "", d_pr2)
505
+
506
+PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
507
+rownames(score__cell_grow_d) <-PathwaysPair
508
+return(score__cell_grow_d)
509
+}
510
+
511
+
512
+#' @title Get human KEGG pathway data.
513
+#' @description getKEGGdata creates a data frame with human KEGG pathway. Columns are the pathways and rows the genes inside those pathway 
514
+#' @param mer  output for example of select_path_carb
515
+#' @export
516
+#' @importFrom KEGGREST keggList
517
+#' @return dataframe with human pathway data
518
+proc_path<-function(mer){
519
+pathways.list <- keggList("pathway", "hsa")## returns the list of human pathways
520
+common<-intersect(pathways.list,mer)
521
+lo<-list()
522
+for (i in 1:length(pathways.list)){
523
+  if (length(intersect(pathways.list[[i]],common)!=0)){
524
+    lo[[i]]<-pathways.list[[i]]
525
+    names(lo)[[i]]<-names(pathways.list)[[i]]
526
+  }
527
+}
528
+
529
+pathways.list<-lo[lapply(lo,length)!=0] 
530
+pathway.codes <- sub("path:", "", names(pathways.list))
531
+b<-do.call("rbind", pathways.list)
532
+list_pathkegg<-list(pathway.codes,b)
533
+return(list_pathkegg)
534
+}
0 535
new file mode 100644
... ...
@@ -0,0 +1,405 @@
1
+#' @title Get human KEGG pathway data and network data in order to define the common gene.
2
+#' @description list_path_net creates a list of interacting genes for each human pathway.   
3
+#' @param net_type  network data as provided by getNETdata
4
+#' @param pathway  pathway data as provided by getKEGGdata
5
+#' @export
6
+#' @return a list of genes for each pathway (interacting genes belong to that pathway)
7
+#' @examples
8
+#' list_path<-list_path_net(net_type=netw,pathway=path)
9
+list_path_net<-function(net_type,pathway){
10
+  i <- sapply(net_type, is.factor) 
11
+  net_type[i] <- lapply(net_type[i], as.character)
12
+  m<-c(net_type$m_shar_pro)
13
+  m2<-c(net_type$m2_shar_pro)
14
+  s<-c(m,m2)
15
+  fr<- unique(s)
16
+  n<-as.data.frame(fr)
17
+  i <- sapply(n, is.factor) 
18
+  n[i] <- lapply(n[i], as.character)
19
+  v=list()
20
+    for (k in  1:ncol(pathway)){
21
+      if (length(intersect(n$fr,pathway[,k])!=0)){
22
+        print(colnames(pathway)[k])
23
+        aa<-intersect(n$fr,pathway[,k])
24
+        v[[k]]<-aa
25
+        names(v)[k]<-colnames(pathway)[k]
26
+      }
27
+    }
28
+  
29
+  return(v)
30
+}
31
+
32
+
33
+#' @title Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the gene expression for only pathways given in input .
34
+#' @description GE_matrix creates a matrix of gene expression for pathways given by the user.   
35
+#' @param DataMatrix  gene expression matrix (eg.TCGA data)
36
+#' @param pathway  pathway data as provided by getKEGGdata
37
+#' @export
38
+#' @return a matrix for each pathway ( gene expression level belong to that pathway)
39
+#' @examples
40
+#' list_path_plot<-GE_matrix(DataMatrix=tumo[,1:2],pathway=path)
41
+GE_matrix<-function(DataMatrix,pathway) {
42
+  path_name<-sub(' ', '_',colnames(pathway))
43
+d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE")
44
+colnames(pathway)<-d_pr
45
+zz<-as.data.frame(rowMeans(DataMatrix))
46
+v<-list()
47
+for ( k in 1: ncol(pathway)){
48
+  #k=2
49
+  if (length(intersect(rownames(zz),pathway[,k])!=0)){
50
+    print(colnames(path)[k])
51
+  currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k])
52
+  currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,])
53
+  rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common
54
+  v[[k]]<- currentPathway_genes_list_common
55
+  names(v)[k]<-colnames(pathway)[k]
56
+  }
57
+}  
58
+PEAmatrix <- matrix( 0,nrow(DataMatrix),ncol(pathway))
59
+rownames(PEAmatrix) <- as.factor(rownames(DataMatrix))
60
+colnames(PEAmatrix) <-  as.factor(colnames(pathway))
61
+for (i in 1:length(v)){
62
+PEAmatrix[v[[i]],i]<-zz[v[[i]],]
63
+}
64
+PEAmatrix<-PEAmatrix[which(rowSums(PEAmatrix) > 0),]
65
+return(PEAmatrix)
66
+}
67
+
68
+
69
+#' @title Get human KEGG pathway data and a gene expression matrix we obtain a matrix with the gene expression for only pathways given in input .
70
+#' @description plotting_matrix creates a matrix of gene expression for pathways given by the user.   
71
+#' @param DataMatrix  gene expression matrix (eg.TCGA data)
72
+#' @param pathway  pathway data as provided by getKEGGdata
73
+#' @param path_matrix  output of the function GE_matrix
74
+#' @export
75
+#' @return a plot for pathway cross talk
76
+#' @examples
77
+#' mt<-plotting_cross_talk(DataMatrix=tumo[,1:2],pathway=path,path_matrix=list_path_plot)
78
+plotting_cross_talk<-function(DataMatrix,pathway,path_matrix){
79
+  zz<-as.data.frame(rowMeans(DataMatrix))
80
+  v<-list()
81
+  for ( k in 1: ncol(pathway)){
82
+    path_name<-sub(' ', '_',colnames(pathway))
83
+    d_pr<- gsub(" - Homo sapiens (human)", "", path_name, fixed="TRUE")
84
+    colnames(pathway)<-d_pr
85
+    if (length(intersect(rownames(zz),pathway[,k])!=0)){
86
+      print(colnames(path)[k])
87
+      currentPathway_genes_list_common <- intersect(rownames(zz), currentPathway_genes<-pathway[,k])
88
+      currentPathway_genes_list_commonMatrix <- as.data.frame(zz[currentPathway_genes_list_common,])
89
+      rownames(currentPathway_genes_list_commonMatrix)<-currentPathway_genes_list_common
90
+      v[[k]]<- as.factor(currentPathway_genes_list_common)
91
+      names(v)[k]<-colnames(pathway)[k]
92
+    }
93
+  }
94
+  vv<-list()
95
+  dc<-cor(t(path_matrix))
96
+  for ( k in 1: length(v)){
97
+    currentPathway_genes_list_common <- intersect(rownames(dc), v[[k]])
98
+    a<-match(currentPathway_genes_list_common,rownames(dc))
99
+    vv[[k]]<- a
100
+    names(vv)[k]<-colnames(pathway)[k]
101
+  }
102
+  list_plt=list(corr=dc,gruppi=vv)
103
+ #r<-qgraph(list_plt$corr, groups=list_plt$gruppi, mar=c(1,1,1,1),minimum=0.6)
104
+  return(list_plt)
105
+}
106
+
107
+
108
+
109
+
110
+#' @title For TCGA data get human pathway data and creates a matrix with the average of genes for each pathway.
111
+#' @description average creates a matrix with a summarized value for each pathway  
112
+#' @param dataFilt TCGA matrix
113
+#' @param pathway pathway data
114
+#' @export
115
+#' @return a matrix value for each pathway 
116
+#' @examples
117
+#' score_mean<-average(dataFilt=tumo[,1:2],path)
118
+average<-function(dataFilt,pathway){
119
+  DataMatrix<-dataFilt
120
+  #dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt))
121
+  #DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),]
122
+  #DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) 
123
+  #rownames(DataMatrix)<-DataMatrix$new.col
124
+  #DataMatrix$new.col<-NULL
125
+
126
+PEAmatrix <- matrix( 0, ncol(pathway),ncol(DataMatrix))
127
+rownames(PEAmatrix) <- colnames(pathway)
128
+colnames(PEAmatrix) <-  colnames(DataMatrix)
129
+listIPA_pathways<-colnames(pathway)
130
+for ( k in 1: nrow(PEAmatrix)){
131
+  #k=1
132
+  currentPathway <- colnames(pathway)[k]
133
+  currentPathway_genes_list_common <- intersect(rownames(DataMatrix), currentPathway_genes<-pathway[,k])
134
+  currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,]
135
+  SumGenes <- colSums(currentPathway_genes_list_commonMatrix)
136
+  AverageGenes <- SumGenes / length(currentPathway_genes_list_common)
137
+  PEAmatrix[k,] <- AverageGenes
138
+}
139
+return(PEAmatrix)
140
+}
141
+
142
+
143
+
144
+  
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+#' @title For TCGA data get human pathway data and creates a measure of cross-talk among pathways 
154
+#' @description euc_dist_crtlk creates a matrix with euclidean distance for pairwise pathways  
155
+#' @param dataFilt TCGA matrix
156
+#' @param pathway pathway data
157
+#' @export
158
+#' @return a matrix value for each pathway 
159
+#' @examples
160
+#' score_euc_dista<-euc_dist_crtlk(dataFilt=tumo[,1:2],path)
161
+euc_dist_crtlk <- function(dataFilt,pathway){
162
+  PEAmatrix<-average(dataFilt,pathway)
163
+  #step 5 distance
164
+  # EUCLIDEA DISTANCE
165
+  df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway
166
+  df=t(df)
167
+  ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze
168
+  colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti
169
+  for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente
170
+    patients <- (PEAmatrix)[,p] 
171
+    distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni
172
+    ma_d[,p]<-distance
173
+  }
174
+  euc_dist<-cbind(df,ma_d) # inserisco label con le relazioni tra i pathway
175
+  return(euc_dist)
176
+}
177
+
178
+
179
+
180
+
181
+#' @title For TCGA data get human pathway data and creates a measure of standard deviations among pathways 
182
+#' @description st_dv creates a matrix with standard deviation for pathways  
183
+#' @param DataMatrix TCGA matrix
184
+#' @param pathway pathway data
185
+#' @export
186
+#' @return a matrix value for each pathway 
187
+#' @examples
188
+#' stand_dev<-st_dv(DataMatrix=tumo[,1:2],pathway=path)
189
+st_dv<-function(DataMatrix,pathway){
190
+#DataMatrix<-dataFilt
191
+
192
+#dataFilt[ , "new.col"] <- gsub("\\|.*", "", rownames(dataFilt))
193
+#DataMatrix<-dataFilt[which(dataFilt$new.col!="?"),]
194
+#DataMatrix <- subset(DataMatrix, !duplicated(DataMatrix$new.col)) 
195
+#rownames(DataMatrix)<-DataMatrix$new.col
196
+#DataMatrix$new.col<-NULL
197
+
198
+PEAmatrix_sd <- matrix( 0, ncol(pathway),ncol(DataMatrix))
199
+rownames(PEAmatrix_sd) <- colnames(pathway)
200
+colnames(PEAmatrix_sd) <-  colnames(DataMatrix)
201
+for ( k in 1: nrow(PEAmatrix_sd)){
202
+  print(colnames(pathway)[k])
203
+  currentPathway <- colnames(pathway)[k]
204
+  currentPathway_genes_list_common <- intersect( rownames(DataMatrix), currentPathway_genes<-pathway[,k])
205
+  currentPathway_genes_list_commonMatrix <- DataMatrix[currentPathway_genes_list_common,]
206
+  stdev<-apply(currentPathway_genes_list_commonMatrix,2,sd) #deviazione standard dei pathway
207
+  PEAmatrix_sd[k,] <- stdev
208
+  }
209
+return(PEAmatrix_sd)
210
+}
211
+
212
+
213
+
214
+
215
+
216
+
217
+#' @title For TCGA data get human pathway data and creates a measure of discriminating score among pathways 
218
+#' @description ds_score_crtlk creates a matrix with  discriminating score for pathways  
219
+#' @param dataFilt TCGA matrix
220
+#' @param pathway pathway data
221
+#' @export
222
+#' @return a matrix value for each pathway 
223
+#' @examples
224
+#' cross_talk_st_dv<-ds_score_crtlk(dataFilt=tumo[,1:2],pathway=path)
225
+ds_score_crtlk<-function(dataFilt,pathway){
226
+  PEAmatrix<-average(dataFilt,pathway)
227
+  #step 5 distance
228
+  # EUCLIDEA DISTANCE
229
+  df=combn(rownames(PEAmatrix),2) # possibili relazioni tra i pathway
230
+  df=t(df)
231
+  ma_d<-matrix(0,nrow(df),ncol(PEAmatrix)) # creo matrix che conterr? le distanze
232
+  colnames(ma_d)<-colnames(PEAmatrix) # colnames conterr? il nome dei pazienti
233
+  for ( p in 1: ncol(PEAmatrix)){ # per ogni paziente
234
+    patients <- (PEAmatrix)[,p] 
235
+    distance<-dist(patients) # calcolo distanza EUCLIDEA tra le possibile combinazioni
236
+    ma_d[,p]<-distance
237
+  }
238
+  PEAmatrix_sd<-st_dv(dataFilt,pathway)
239
+  df=combn(rownames(PEAmatrix_sd),2) 
240
+  df=t(df)
241
+  ma<-matrix(0,nrow(df),ncol(PEAmatrix_sd)) # creo matrix che conterr? le somme delle dev st
242
+  colnames(ma)<-colnames(PEAmatrix_sd) # colnames conterr? il nome dei pazienti
243
+  for ( p in 1: ncol(PEAmatrix_sd)){ # per ogni paziente
244
+    patients <- (PEAmatrix_sd)[,p] 
245
+    out <- apply(df, 1, function(x) sum(patients[x])) # calcolo somma delle dev standard tra le possibili combinazioni
246
+    ma[,p]<-out
247
+  }
248
+  score<-ma_d/ma # discriminating score M1-M2/S1+S2
249
+  score<- cbind(df,score)  
250
+return(score)
251
+}
252
+
253
+
254
+
255
+#' @title SVM classification for each feature
256
+#' @description svm class creates a list with auc value  
257
+#' @param TCGA_matrix gene expression matrix
258
+#' @param nfs nfs split data into a training  and test set
259
+#' @param tumour barcode samples for a class
260
+#' @param normal barcode samples for another class
261
+#' @export
262
+#' @importFrom e1071 tune svm 
263
+#' @importFrom ROCR prediction performance 
264
+#' @importFrom  grDevices rainbow
265
+#' @return a list with AUC value for pairwise pathway 
266
+#' @examples
267
+#' nf <- 60
268
+#' res_class<-svm_classification(TCGA_matrix=score_euc_dist,nfs=nf,
269
+#' normal=colnames(norm[,1:10]),tumour=colnames(tumo[,1:10]))
270
+svm_classification<-function(TCGA_matrix,tumour,normal,nfs){
271
+  #library("e1071")
272
+  #library(ROCR)
273
+
274
+  scoreMatrix <- as.data.frame(TCGA_matrix[,3:ncol(TCGA_matrix)])
275
+  scoreMatrix <-as.data.frame(scoreMatrix)
276
+  for( i in 1: ncol(scoreMatrix)){
277
+    scoreMatrix[,i] <- as.numeric(as.character(scoreMatrix[,i]))
278
+  }
279
+
280
+  TCGA_matrix[,1] <- gsub(" ", "_", TCGA_matrix[,1])
281
+  d<-sub('_-_Homo_sapiens_*', '', TCGA_matrix[,1])
282
+  #d_pr<-sub(')*', '', DataMatrix[,1])
283
+  
284
+  d_pr<- gsub("(human)", "", d, fixed="TRUE")
285
+  d_pr <- gsub("_", "", d_pr)
286
+  d_pr <- gsub("-", "", d_pr)
287
+  
288
+  TCGA_matrix[,2] <- gsub(" ", "_", TCGA_matrix[,2])
289
+  d2<-sub('_-_Homo_sapiens_(human)*', '', TCGA_matrix[,2])
290
+  d_pr2<- gsub("(human)", "", d2, fixed="TRUE")
291
+  d_pr2 <- gsub("_", "", d_pr2)
292
+  d_pr2 <- gsub("-", "", d_pr2)
293
+  
294
+  PathwaysPair <- paste( as.matrix(d_pr), as.matrix(d_pr2),sep="_" )
295
+  
296
+  rownames(scoreMatrix) <-PathwaysPair
297
+
298
+  
299
+  tDataMatrix<-as.data.frame(t(scoreMatrix))
300
+  #tDataMatrix$Target[,1]<-0
301
+  
302
+  tDataMatrix<-cbind(Target=0,tDataMatrix )
303
+
304
+  tum<-intersect(rownames(tDataMatrix),tumour)
305
+  nor<-intersect(rownames(tDataMatrix),normal)
306
+  #tDataMatrix$
307
+    
308
+  Dataset_g1<-tDataMatrix[nor,]
309
+  Dataset_g3<- tDataMatrix[tum,]
310
+    
311
+  
312
+#training=read.table('C:/Users/UserInLab05/Desktop/trai.txt',header = TRUE)
313
+#testset=read.table('C:/Users/UserInLab05/Desktop/test.txt',header = TRUE)
314
+
315
+  Dataset_g1$Target <- 0
316
+  Dataset_g3$Target<-1
317
+#Dataset_g3 <- Dataset_g3[Dataset_g3$Target <- 1, ]
318
+  
319
+tab_g1_training <- sample(rownames(Dataset_g1),round(nrow(Dataset_g1) / 100 * nfs ))
320
+tab_g3_training <- sample(rownames(Dataset_g3),round(nrow(Dataset_g3) / 100 * nfs ))
321
+tab_g1_testing <- setdiff(rownames(Dataset_g1),tab_g1_training)
322
+tab_g3_testing <- setdiff(rownames(Dataset_g3),tab_g3_training)
323
+
324
+FR<-intersect(rownames(Dataset_g1),tab_g1_training)
325
+
326
+#rownames(Dataset_g1)<-Dataset_g1[,1]
327
+G1<-Dataset_g1[FR,]
328
+
329
+FR1<-intersect(rownames(Dataset_g3),tab_g3_training)
330
+#rownames(Dataset_g3)<-Dataset_g3$ID
331
+
332
+G3<-Dataset_g3[FR1,]
333
+training<-rbind(G1,G3)
334
+
335
+inter1<-intersect(rownames(Dataset_g1),tab_g1_testing)
336
+#rownames(Dataset_g1)<-Dataset_g1$ID
337
+
338
+G1_testing<-Dataset_g1[inter1,]
339
+
340
+inter2<-intersect(rownames(Dataset_g3),tab_g3_testing)
341
+#rownames(Dataset_g3)<-Dataset_g3$ID
342
+G3_testing<-Dataset_g3[inter2,]
343
+
344
+testing<-rbind(G1_testing,G3_testing)
345
+
346
+x <- subset(training, select=-Target)
347
+y <- training$Target
348
+#testing[,2]<-NULL
349
+z<-subset(testing, select=-Target)
350
+
351
+zi<-testing$Target
352
+
353
+auc.df<-list()
354
+svm_model_after_tune_COMPL<-list()
355
+for( k in 2: ncol(training)){
356
+  print(colnames(training)[k])
357
+  svm_tune <- tune(svm, train.x=x, train.y=y, 
358
+                   kernel="radial", ranges=list(cost=10^(-1:2), gamma=c(.5,1,2)),cross=10)
359
+  #print(svm_tune)
360
+  
361
+  svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters$cost, gamma=svm_tune$best.parameters$gamma,cross=10,probability = TRUE)
362
+  
363
+  
364
+  #svm_model_after_tune <- svm(Target ~ ., data=training[,c(1,k)], kernel="radial", cost=svm_tune$best.parameters[1], gamma=svm_tune$best.parameters[2],cross=10,probability = TRUE)
365
+  #summary(svm_model_after_tune)
366
+
367
+  j=k-1
368
+  z2=z[,j]
369
+  z3<-as.data.frame(z2)
370
+  #rownames(z3)<-rownames(z)
371
+  #colnames(z3)<-as.character(paste("X",j,sep = ""))
372
+  colnames(z3)<-colnames(z)[j]
373
+  #classifiersMatrix <- c(classifiersMatrix,svm_model_after_tune)
374
+  pred <- predict(svm_model_after_tune,z3,decision.values=TRUE,cross=10)
375
+
376
+  #a<-table(pred,zi)
377
+  svm.roc <- prediction(attributes(pred)$decision.values, zi)
378
+  svm.auc <- performance(svm.roc, 'tpr', 'fpr')
379
+
380
+  perf <- performance(svm.roc, "auc")
381
+  auc<-perf@y.values[[1]]
382
+  
383
+  auc.df[[j]]<- auc
384
+  svm_model_after_tune_COMPL[[j]]<-svm_model_after_tune
385
+  
386
+  palette <- as.matrix(rainbow(ncol(z)))
387
+  #print(j)
388
+  if (j >1 & j < 6) {
389
+    plot(svm.auc,col=palette[j], add=TRUE)
390
+    legend('bottomright', colnames(z), 
391
+           lty=1, col=palette, bty='n', cex=.90,pch = 20,ncol=1)
392
+    
393
+
394
+  }
395
+  else {
396
+    plot(svm.auc, col=palette[j])
397
+
398
+    
399
+  }
400
+  
401
+}
402
+names(auc.df) <- colnames(z)
403
+return(auc.df)
404
+}
405
+
0 406
new file mode 100644
... ...
@@ -0,0 +1,8 @@
1
+# StarBioTrek
2
+
3
+### Installation ###
4
+```R
5
+source("https://bioconductor.org/biocLite.R")
6
+biocLite("StarBioTrek")
7
+```
8
+
0 9
new file mode 100644
1 10
Binary files /dev/null and b/data/Data_CANCER_normUQ_filt.rda differ
2 11
new file mode 100644
3 12
Binary files /dev/null and b/data/list_path_plot.rda differ
4 13
new file mode 100644
5 14
Binary files /dev/null and b/data/netw.rda differ
6 15
new file mode 100644
7 16
Binary files /dev/null and b/data/norm.rda differ
8 17
new file mode 100644
9 18
Binary files /dev/null and b/data/path.rda differ
10 19
new file mode 100644
11 20
Binary files /dev/null and b/data/score_euc_dist.rda differ
12 21
new file mode 100644
13 22
Binary files /dev/null and b/data/tumo.rda differ
14 23
new file mode 100644
... ...
@@ -0,0 +1,32 @@
1
+citHeader("To cite StarBioTrek in publications use:")
2
+
3
+citEntry(entry = "article",
4
+         title        = "StarBioTrek 
5
+    miRNA data",
6
+         author       = personList(as.person("Claudia Cava"),
7
+                                   as.person("Isabella Castiglioni")
8
+         ),
9
+         journal = "manuscript in preparation",
10
+         year         = "2016",
11
+         textVersion  =
12
+             paste("Claudia Cava, Isabella Castiglioni (2016).",
13
+                   "StarBioTrek")
14
+)
15
+
16
+citEntry(entry = "article",
17
+         title        = "Integrating genetics and epigenetics in
18
+breast cancer: biological insights, experimental, computational methods and
19
+therapeutic potential.",
20
+         author       = personList(as.person("Claudia Cava"),
21
+                                   as.person("Gloria Bertoli"),
22
+                                   as.person("Isabella Castiglioni")
23
+         ),
24
+         journal = "BMC Syst Biol",
25
+         year         = "2015",
26
+		 	volume = 9,
27
+	number = 62,
28
+         textVersion  =
29
+             paste("Cava C, Bertoli G, Castiglioni I. Integrating genetics and epigenetics in
30
+breast cancer: biological insights, experimental, computational methods and
31
+therapeutic potential. BMC Syst Biol.2015;9:62")
32
+)
0 33
new file mode 100644
... ...
@@ -0,0 +1,12 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/StarBioTrek.r
3
+\docType{data}
4
+\name{Data_CANCER_normUQ_filt}
5
+\alias{Data_CANCER_normUQ_filt}
6
+\title{TCGA data}
7
+\format{A data frame with rows and variables}
8
+\description{
9
+TCGA data
10
+}
11
+\keyword{internal}
12
+
0 13
new file mode 100644
... ...
@@ -0,0 +1,23 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/path_star.R
3
+\name{GE_matrix}
4
+\alias{GE_matrix}
5
+\title{Get human KEGG pathway data and a gene expression matrix in order to obtain a matrix with the gene expression for only pathways given in input .}
6
+\usage{
7
+GE_matrix(DataMatrix, pathway)