Browse code

Adding SpidermiR, DRIMSeq, debrowser, GMRP, ISoLDE, CountClust, OncoScore, AneuFinder, recoup, genphen, DEFormats, sscu

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GMRP@115200 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 23/03/2016 22:36:02
Showing 30 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,18 @@
1
+Package: GMRP
2
+Type: Package
3
+Title: GWAS-based Mendelian Randomization and Path Analyses
4
+Version: 0.99.2
5
+Date: 2015-11-04
6
+Author: Yuan-De Tan and Dajiang Liu
7
+Maintainer: Yuan-De Tan <tanyuande@gmail.com>
8
+Description: Perform Mendelian randomization analysis of multiple SNPs
9
+        to determine risk factors causing disease of study and to
10
+        exclude confounding variabels and perform path analysis to
11
+        construct path of risk factors to the disease.
12
+License: GPL (>= 2)
13
+Depends: R(>= 3.3.0),stats,utils,graphics, grDevices, diagram, plotrix,
14
+        base,GenomicRanges
15
+Suggests: BiocStyle, BiocGenerics, VariantAnnotation
16
+LazyLoad: yes
17
+biocViews: Sequencing, Regression, SNP
18
+NeedsCompilation: no
0 19
new file mode 100644
... ...
@@ -0,0 +1,17 @@
1
+exportPattern("^[[:alpha:]]+")
2
+export(fmerg)
3
+export(chrp)
4
+export(mktable)
5
+export(path)
6
+export(pathdiagram)
7
+export(pathdiagram2)
8
+export(snpposit)
9
+export(ucscannot)
10
+import(graphics)
11
+import(utils)
12
+import(diagram)
13
+import(plotrix)
14
+#import(data.table)
15
+import(grDevices)
16
+import(stats)
17
+import(GenomicRanges)
0 18
new file mode 100644
... ...
@@ -0,0 +1,14 @@
1
+chrp <-
2
+function(hg){
3
+hg<-as.character(hg)	
4
+HG<-unlist(strsplit(hg,split=":"))
5
+n<-length(hg)
6
+i<-seq(n)
7
+k1<-i*2-1
8
+k2<-i*2
9
+chr<-HG[k1]
10
+posit<-as.numeric(HG[k2])
11
+CHR<-unlist(strsplit(chr,split="chr"))
12
+chrn<-as.numeric(CHR[k2])
13
+return(cbind(chrn,posit))
14
+}
0 15
new file mode 100644
... ...
@@ -0,0 +1,39 @@
1
+fmerg <-
2
+function(fl1,fl2,ID1,ID2,A="",B="",method="No"){
3
+
4
+try (if(is.null(dim(fl1))|is.null(dim(fl2))) 
5
+   stop("No data to be merged")
6
+   )
7
+fl1<-cbind(fl1[,ID1],as.data.frame(fl1))
8
+colnames(fl1)[1]<-"SNPID"
9
+fl2<-cbind(fl2[,ID2],as.data.frame(fl2))
10
+colnames(fl2)[1]<-"SNPID"
11
+method<-tolower(method)
12
+if(method=="no"|method=="n"){
13
+merge.dat<-merge(fl1,fl2,by="SNPID",suffixes=c(A,B),sort=TRUE)
14
+#merge.dat<-subset(merge.dat,SNPID!=".")
15
+}
16
+else if(method=="file1"){
17
+merge.dat<-merge(fl1,fl2,by="SNPID",suffixes=c(A,B),all.x=TRUE,sort=TRUE)
18
+}
19
+else if(method=="file2"){
20
+merge.dat<-merge(fl1,fl2,by="SNPID",suffixes=c(A,B),all.y=TRUE,sort=TRUE)
21
+}
22
+else if(method=="all"|method=="a"){
23
+colnames(fl1)[1]<-"SNPID1"
24
+colnames(fl2)[2]<-"SNPID2"
25
+rownames(fl1)<-fl1$SNPID1
26
+rownames(fl2)<-fl2$SNPID2
27
+pos.all <- unique(c(fl1$SNPID1,fl2$SNPID2))
28
+ind.fl1 <- match(pos.all,fl1$SNPID1)
29
+ind.fl2 <- match(pos.all,fl2$SNPID2)
30
+fl1.new <- fl1[ind.fl1,]
31
+fl2.new<-fl2[ind.fl1,]
32
+colnames(fl1.new) <- paste(A,colnames(fl1),sep='.')
33
+colnames(fl2.new) <- paste(B,colnames(fl2),sep='.')
34
+merge.dat<-cbind(as.data.frame(fl1.new),as.data.frame(fl2.new))
35
+mergdata<-merge.dat[,-"SNPID2"]
36
+}
37
+mergdata<-merge.dat[,-1]
38
+return(mergdata)
39
+}
0 40
new file mode 100644
... ...
@@ -0,0 +1,132 @@
1
+mktable <-
2
+function(cdata,ddata,rt="beta",varname,LG=1,Pv=0.00000005,Pc=0.979,Pd=0.979){
3
+
4
+try(
5
+ if(is.null(cdata))
6
+  stop("No course data")
7
+  )
8
+
9
+try(
10
+ if(is.null(ddata))
11
+  stop("No disease data")
12
+  )
13
+
14
+cdata<-as.data.frame(cdata)
15
+ddtat<-as.data.frame(ddata)
16
+
17
+pvj<-cdata$pvj
18
+pcj<-cdata$pcj
19
+
20
+#print(length(posit))
21
+newcdat<-subset(cdata,(pvj<=Pv&pcj>=Pc))
22
+rsid<-newcdat$rsid
23
+SNP.d<-ddata$SNP.d
24
+
25
+idx<-is.element(SNP.d,rsid)
26
+SNP<-SNP.d[idx]
27
+ND<-ddata$N.case[idx]+ddata$N.ctr[idx]
28
+pdj<-ND/max(ND)
29
+newcdat<-as.data.frame(newcdat)
30
+
31
+ddata<-cbind(ddata[idx,],pdj)
32
+newcd<-fmerg(fl1=newcdat,fl2=ddata,ID1="rsid",ID2="SNP.d",A="",B="",method="no")
33
+sbnewdat<-subset(newcd,pdj>=Pd)
34
+sbnewdat<-as.data.frame(sbnewdat)
35
+
36
+if(LG>2){
37
+
38
+sbnewdat1<-sbnewdat[with(sbnewdat,order(chrn,posit)),]
39
+
40
+chrn<-sbnewdat1$chrn
41
+chrn.vect<-unique(chrn)
42
+#ch<-chrn.vect[1]
43
+posit<-sbnewdat1$posit
44
+
45
+#distance<-posit[max(ix)]-posit[min(ix)]
46
+distance<-rep(NA,length(chrn.vect))
47
+posit.new<-rep(NA,length(posit))
48
+for(ch in chrn.vect){
49
+ #  print(ch)
50
+    ix<-which(chrn==ch)
51
+  #  print(ix)
52
+    distance[ch]<-round((posit[max(ix)]-posit[min(ix)])/LG,0)
53
+    if(is.na(distance[ch])==FALSE){
54
+    if(distance[ch]<=1){
55
+     posit.new[min(ix)]<-posit[min(ix)]
56
+       }else{
57
+
58
+      for(i in min(ix):(max(ix)-2)){
59
+      if((posit[(i+1)]-posit[i]>LG)&(posit[(i+2)]-posit[(i+1)])>LG){
60
+       posit.new[i]<-posit[i]
61
+       posit.new[(i+1)]<-posit[(i+1)]
62
+          }
63
+        }
64
+       }
65
+    }
66
+    }
67
+# check if posit.new is in posit, T for yes and F for no
68
+ indx<-is.element(posit,posit.new)
69
+ sbnewdat2<-sbnewdat1
70
+ sbnewdat3<-sbnewdat2[indx,]
71
+ }else{
72
+ sbnewdat3<-sbnewdat
73
+
74
+ }
75
+
76
+ dim(sbnewdat3)
77
+sbnewdat3<-data.frame(sbnewdat3)
78
+chr<-sbnewdat3$chrn
79
+nSNP<-sbnewdat3$rsid
80
+nposit<-sbnewdat3$posit
81
+
82
+freqcase<-sbnewdat3$freq.case
83
+ #print(is.numeric(freqcase))
84
+ ln<-length(varname)
85
+ rn<-nrow(sbnewdat3)
86
+ betay<-alle<-freq<-sd<-matrix(NA,nrow=rn,ncol=ln)
87
+ betay[,1]<-sbnewdat3$beta.d # disease beta value vector of SNPs
88
+ alle[,1]<-sbnewdat3$a1.d
89
+ freq[,1]<-sbnewdat3$freq.d
90
+ sd[,1]<-sqrt(freqcase*(1-freqcase))
91
+ for(i in 2:ln){
92
+ k<-(2+2*(ln-1)+i)
93
+ betay[,i]<-sbnewdat3[,k]
94
+     k1<-(2+(ln-1)+i)
95
+ freq[,i]<-sbnewdat3[,k1]
96
+     k2<-2+i
97
+ alle[,i]<-sbnewdat3[,k2]
98
+ k3<-(2+3*(ln-1)+i)
99
+ sd[,i]<-sbnewdat3[,k3]
100
+ }
101
+ #print(head(freq))
102
+ rn<-nrow(betay)
103
+ for(i in 2:ln){
104
+ for(j in 1:rn){
105
+ if(identical(alle[j,1],alle[j,i])==FALSE){
106
+     betay[j,i]<--betay[j,i]
107
+     }
108
+   }
109
+ }
110
+
111
+colnames(betay)<-varname
112
+
113
+betax<--matrix(NA,nrow=rn,ncol=ln)
114
+idx<-seq_len(ln)
115
+betax[,idx]<-betay[,idx]*sqrt((freq[,idx]*(1-freq[,idx]))/sd[,idx])
116
+
117
+ betay<-cbind(nposit,betay)
118
+ betay<-cbind(chr,nSNP,as.data.frame(betay))
119
+
120
+ colnames(betay)[1:3]<-c("chr","SNP","posit")
121
+ colnames(betax)<-varname
122
+ betax<-cbind(chr,nSNP,nposit,betax)
123
+ colnames(betax)[1:3]<-c("chr","SNP","posit")
124
+
125
+rt<-tolower(rt)
126
+if (rt=="beta"|rt=="b"){
127
+ return(betay)
128
+ }else if (rt=="path"|rt=="p"){
129
+   return(betax)
130
+ }
131
+
132
+ }
0 133
new file mode 100644
... ...
@@ -0,0 +1,86 @@
1
+path <-
2
+function(betav,model,outcome){
3
+betav<-as.data.frame(betav)
4
+rn<-nrow(betav) # number of beta values in each beta variable
5
+cn<-ncol(betav)
6
+#betav<-DataFrame(betav)
7
+fit<-summary(lm(model,data=betav))
8
+
9
+print(fit)
10
+corr<-cor(betav)
11
+cvar<-cov(betav)
12
+
13
+beta.xx<-rep(0,(cn-1))
14
+
15
+for(i in 2:cn){
16
+beta.xx[i-1]<-fit$coef[i,1]
17
+}
18
+
19
+varx<-rep(0,(cn-1))
20
+vary<-cvar[1,1]
21
+corrx<-corr[-1,-1]
22
+cvarx<-cvar[-1,-1]
23
+
24
+for(i in 1:(cn-1)){
25
+varx[i]<-cvarx[i,i]
26
+}
27
+
28
+py<-rep(0,(cn-1)) # direction path coefficients
29
+
30
+for(i in 1:(cn-1)){
31
+py[i]<-beta.xx[i]*(varx[i]/vary)^0.5
32
+#f2[i]<-(varx[i]/vary)^0.5
33
+}
34
+print(py)
35
+#print(vratio)
36
+path<-matrix(0,(cn-1),(cn-1))
37
+for(i in 1:(cn-1)){
38
+	for(j in 1:(cn-1)){
39
+path[i,j]<-corrx[i,j]*py[j]
40
+ }
41
+}
42
+pe<-fit$sigma/sqrt(vary)
43
+
44
+colnames(path)<-colnames(corrx)
45
+rownames(path)<-rownames(corrx)
46
+
47
+corr.outcome<-apply(path,1,sum)
48
+path<-cbind(path,corr.outcome)
49
+pn<-ncol(path)
50
+beta<-fit$coef
51
+se<-beta[,2]
52
+#betacol<-matrix(NA,nrow=nrow(beta),ncol=1)
53
+betar<-round(beta,6)
54
+betar[,ncol(beta)]<-beta[,ncol(beta)] #p-values without round treatment
55
+#print(betar)
56
+corrname<-rownames(corr)
57
+colnames(path)[pn]<-outcome
58
+
59
+pathname<-colnames(path)
60
+
61
+corr<-round(corr,6)
62
+path<-round(path,6)
63
+#R2<-rep(NA,cn)
64
+#R2[1]<-fit$r.square
65
+R_square<-rep(NA,ncol(path))
66
+Pe<-rep(NA,ncol(path))
67
+Pe[1]<-pe
68
+#path<-rbind(path,R2)
69
+R_square[1]<-round(fit$r.square,6)
70
+Path_value<-rep(NA,length(py)+1)
71
+Path_value[1:length(py)]<-round(py,6)
72
+t_value<-p_value<-rep(NA,(1+length(py)))
73
+for(i in 1:length(py)){
74
+t_value[i]<-py[i]/se[i]
75
+}
76
+for(i in 1:length(py)){
77
+p_value[i]<-2*pt(-abs(t_value[i]),df=rn-cn)
78
+}
79
+
80
+betax<-matrix(NA,nrow=nrow(betar),ncol=(cn-ncol(beta)))
81
+betar<-cbind(betar,betax)
82
+
83
+result<-rbind(betar,corrname,corr,pathname,path,Path_value,Pe,R_square)
84
+return(noquote(result))
85
+
86
+}
0 87
new file mode 100644
... ...
@@ -0,0 +1,144 @@
1
+pathdiagram <-
2
+function(pathdata,disease,R2,range){
3
+
4
+#library(diagram)
5
+
6
+pathcad<-pathdata
7
+
8
+#disease<-"CAD"
9
+#range<-c(2:5)
10
+pathname<-toupper(colnames(pathcad)[range])
11
+
12
+rownames(pathcad)<-pathname
13
+
14
+corr.xx<-pathcad[,range]
15
+path_value<-pathcad$path
16
+residual<-sqrt(1-R2)
17
+
18
+par(mar = c(3, 1, 1, 1))
19
+openplotmat()
20
+shdv<-0.005
21
+colorset<-c(colors()[645],colors()[642],colors()[503],colors()[373],colors()[92],colors()[134],colors()[139],colors()[39],colors()[20],colors()[44],colors()[71],colors()[123],colors()[128],colors()[132])
22
+cl<-length(colorset)
23
+
24
+mycolor<-rep(NA,cl)
25
+mycolor<-colorset[1:cl]
26
+mypath<-cbind(seq(1:length(path_value)),path_value)
27
+mypath<-mypath[order(-path_value),]
28
+indx<-mypath[,1]
29
+mcolor<-rep(NA,(1+length(path_value)))
30
+
31
+pl<-length(path_value)
32
+for(i in 1:pl){
33
+ if(mypath[i,2]<0){
34
+ k=cl+1-i
35
+ }else if(mypath[i,2]>0&mypath[i,2]<0.15){
36
+ k=cl-5-i
37
+ }else{
38
+k=i
39
+ }
40
+mcolor[(indx[i]+1)]<-mycolor[k]
41
+}
42
+mcolor[1]<-colors()[82]
43
+
44
+# Get plot coordinates
45
+elpos <- coordinates(c(2, length(path_value)))
46
+# adjust coordinates for Residual
47
+elpos[2,1] <- abs((elpos[1,1]+elpos[1,2])/2)
48
+
49
+# Specify Arrow positions
50
+#1 Residual to Dependent
51
+ft1 <- matrix(ncol = 2, byrow = TRUE, data = c(1, 2))
52
+#2 Independent to dependent
53
+ft2 <- matrix(ncol=2, byrow = FALSE,
54
+data= c(seq((2+length(path_value)))[3:(length(path_value)+2)], rep(2, length(path_value))))
55
+
56
+#3 For cor.x
57
+fromto_CU <- t(combn(seq((2+length(path_value)))[3:(length(path_value)+2)],2))
58
+#4 For path distances
59
+fromto_ST <- rbind(ft1,ft2)
60
+
61
+# Plot Path distance arrows
62
+nr <- nrow(fromto_ST)
63
+
64
+arrpos <- matrix(ncol = 2, nrow = nr)
65
+
66
+for (i in 1:nr){
67
+k=i+6
68
+  arrpos[i, ] <-straightarrow (to = elpos[fromto_ST[i, 2], ],
69
+                                from = elpos[fromto_ST[i, 1], ],
70
+                     lcol=mcolor[i], lwd = 3, arr.pos = 0.7, arr.length = 0.5)
71
+ }
72
+
73
+#Label residual path distance arrow
74
+text(arrpos[1, 1]-0.05, arrpos[1, 2] + 0.03,
75
+     paste("Pe", " = ", round(residual, 3), sep=""), cex=1)
76
+
77
+#Label path distance arrows
78
+nr <- nrow(arrpos)
79
+for(i in 2:nr){
80
+  text(arrpos[i, 1]+(i*0.7-2)*0.14, arrpos[i, 2]-0.18,paste("P","(",pathname[i-1],")"," = ", round(path_value[i-1], 3), sep=""),cex=1,col=mcolor[i])
81
+
82
+}
83
+
84
+# Plot correlation arrows direction 1
85
+nr <- nrow(fromto_CU)
86
+arrpos <- matrix(ncol = 2, nrow = nr)
87
+for (i in 1:nr){
88
+#k=i+6
89
+  arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 2], ],
90
+                                from = elpos[fromto_CU[i, 1], ],
91
+                                lwd = 2, arr.pos = 0.8, arr.length = 0.5, curve = 0.3)
92
+}
93
+
94
+# Plot correlation arrows - direction 2
95
+nr <- nrow(fromto_CU)
96
+arrpos <- matrix(ncol = 2, nrow = nr)
97
+for (i in 1:nr){
98
+#k=i+6
99
+  arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 1], ],
100
+                              from = elpos[fromto_CU[i, 2], ],
101
+                              lwd = 2, arr.pos = 0.8, arr.length = 0.5, curve = -0.3)
102
+}
103
+# Create combinations of cor.x for labelling rxy in correlation arrows
104
+rcomb <- as.data.frame(t(combn(seq(nrow(corr.xx)),2)))
105
+rcomb <- paste(rcomb$V1,rcomb$V2, sep="")
106
+
107
+# Label correlation arrows
108
+nr <- nrow(fromto_CU)
109
+arrpos <- matrix(ncol = 2, nrow = nr)
110
+for (i in 1:nr)
111
+#   k=i+6
112
+  arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 1], ],
113
+                              from = elpos[fromto_CU[i, 2], ],
114
+                              lwd = 2, arr.pos = 0.5, lcol = "transparent", arr.length = 0.5, curve = -0.3)
115
+
116
+nr <- nrow(arrpos)
117
+for(i in 1:nr){
118
+  text(arrpos[i, 1], arrpos[i, 2] + 0.03,
119
+       paste("r", rcomb[i]," = ", round(as.dist(corr.xx)[i], 3), sep=""), cex=1)
120
+}
121
+
122
+# Label Residual
123
+#textrect (elpos[1,], 0.09, 0.03,lab = "Residual", box.col =colors()[(75+7)],
124
+#          shadow.col = "grey", shadow.size = 0.01, cex = 1)
125
+
126
+textround(elpos[1,], 0.02, 0.04,lab = "Residual", box.col =colors()[82],
127
+          shadow.col = "grey", shadow.size = shdv, cex = 1)
128
+
129
+# Label Dependent
130
+#textrect (elpos[2,], 0.09, 0.03,lab = "AUDPC", box.col = "red",
131
+#          shadow.col = "grey", shadow.size = 0.005, cex = 1)
132
+
133
+textround (elpos[2,], 0.02, 0.04,lab = disease, box.col = "red",
134
+          shadow.col = "grey", shadow.size = shdv, cex = 1)
135
+
136
+# Label independents
137
+nr <- nrow(elpos)
138
+for (i in 3:nr){
139
+	k<-i+5
140
+ # textrect (elpos[i,], 0.09, 0.03,lab = colnames(x)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", #shadow.size = 0.005, cex = 1)
141
+
142
+ textround (elpos[i,], 0.02, 0.04,lab = pathname[i-2], box.col =mcolor[i-1],shadow.col = "grey", shadow.size = shdv, cex = 1)
143
+}
144
+}
0 145
new file mode 100644
... ...
@@ -0,0 +1,214 @@
1
+pathdiagram2 <-
2
+function(pathD,pathO,rangeD,rangeO,disease,R2D,R2O){
3
+#require(agricolae)
4
+#require(Hmisc)
5
+#library(diagram)
6
+
7
+pathcad<-pathD
8
+pathtc<-pathO
9
+#rangeD<-c(2:5)
10
+#rangeO<-c(2:4)
11
+#R2D<-0.241792
12
+#R2O<-0.965284
13
+cadpathname<-toupper(colnames(pathcad)[rangeD])
14
+
15
+rownames(pathcad)<-cadpathname
16
+rownametc<-toupper(colnames(pathtc)[rangeO])
17
+for(i in 1:length(rownametc))
18
+if(identical(cadpathname[i],rownametc[i])==FALSE){
19
+	stop("no match between child and parent causal variables.")
20
+}
21
+if(R2D==""|R2O==""){
22
+stop("R2D and R2O are required")
23
+}
24
+pathtc<-data.frame(pathtc)
25
+pathcad<-data.frame(pathD)
26
+#rownames(pathtc)<-rownametc
27
+corr.tc<-pathtc[,rangeO]
28
+colnames(corr.tc)<-rownametc
29
+rownames(corr.tc)<-rownametc
30
+path.tc<-pathtc$path
31
+path.cad<-pathcad$path
32
+residual.tc<-sqrt(1-R2O)
33
+residual.cad<-sqrt(1-R2D)
34
+
35
+par(mar = c(1, 1, 1, 1))
36
+openplotmat()
37
+# Get plot coordinates
38
+elpos <- coordinates(c(2, length(path.tc)))*0.7
39
+
40
+# adjust coordinates for Residual
41
+elpos[2,1] <- abs((elpos[1,1]+elpos[1,2])/2)
42
+
43
+# Specify Arrow positions
44
+#1 Residual to Dependent
45
+ft1 <- matrix(ncol = 2, byrow = TRUE, data = c(1, 2))
46
+#2 Independent to dependent
47
+ft2 <- matrix(ncol=2, byrow = FALSE,
48
+data= c(seq((2+length(path.tc)))[3:(length(path.tc)+2)], rep(2, length(path.tc))))
49
+#3 For cor.x
50
+fromto_CU <- t(combn(seq((2+length(path.tc)))[3:(length(path.tc)+2)],2))
51
+#4 For path distances
52
+fromto_ST <- rbind(ft1,ft2)
53
+
54
+# Plot Path distance arrows
55
+nr <- nrow(fromto_ST)
56
+
57
+arrpos <- matrix(ncol = 2, nrow = nr)
58
+
59
+for (i in 1:nr){
60
+k=i+6
61
+  arrpos[i, ] <-straightarrow (to = elpos[fromto_ST[i, 2], ],
62
+                                from = elpos[fromto_ST[i, 1], ],
63
+                     lcol=colors()[(75+k)], lwd = 2, arr.pos = 0.6, arr.length = 0.5)
64
+ }
65
+
66
+#Label residual path distance arrow
67
+text(arrpos[1, 1]-0.05, arrpos[1, 2] + 0.02,
68
+     paste("Pe", " = ", round(residual.tc, 3), sep=""), cex=0.8)
69
+
70
+#Label path distance arrows
71
+nr <- nrow(arrpos)
72
+for(i in 2:nr){
73
+ k=i+6
74
+text(arrpos[i, 1]+(i-2)*0.06, arrpos[i, 2]-0.05,paste("P","(",tolower(rownametc[i-1]),")"," = ", round(path.tc[i-1], 3), sep=""),cex=0.8,col=colors()[(75+k)])
75
+
76
+}
77
+
78
+# Plot correlation arrows direction 1
79
+nr <- nrow(fromto_CU)
80
+arrpos <- matrix(ncol = 2, nrow = nr)
81
+for (i in 1:nr){
82
+#k=i+6
83
+  arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 2], ],
84
+                                from = elpos[fromto_CU[i, 1], ],
85
+                                lwd = 2, arr.pos = 0.8, arr.length = 0.5, curve = 0.35)
86
+}
87
+
88
+# Plot correlation arrows - direction 2
89
+for (i in 1:nr){
90
+#k=i+6
91
+  arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 1], ],
92
+                                from = elpos[fromto_CU[i, 2], ],
93
+                                lwd = 2, arr.pos = 0.8, arr.length = 0.5, curve = -0.35)
94
+}
95
+
96
+# Create combinations of cor.x for labelling rxy in correlation arrows
97
+rcomb <- as.data.frame(t(combn(seq(nrow(corr.tc)),2)))
98
+rcomb <- paste(rcomb$V1,rcomb$V2, sep="")
99
+
100
+# Label correlation arrows
101
+nr <- nrow(fromto_CU)
102
+arrpos <- matrix(ncol = 2, nrow = nr)
103
+for (i in 1:nr)
104
+#   k=i+6
105
+  arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 1], ],
106
+                              from = elpos[fromto_CU[i, 2], ],
107
+                              lwd = 2, arr.pos = 0.5, lcol = "transparent", arr.length = 0.5, curve = -0.35)
108
+
109
+nr <- nrow(arrpos)
110
+for(i in 1:nr){
111
+  text(arrpos[i, 1], arrpos[i, 2] + 0.03,
112
+       paste("r", rcomb[i]," = ", round(as.dist(corr.tc)[i], 2), sep=""), cex=1)
113
+}
114
+
115
+# Label Residual
116
+#textrect (elpos[1,], 0.09, 0.03,lab = "Residual", box.col =,
117
+#          shadow.col = "grey", shadow.size = 0.005, cex = 1)
118
+
119
+textround(c(elpos[1,1]-0.08,elpos[1,2]), 0.02, 0.04,lab = "Residual", box.col =colors()[(75+7)],
120
+          shadow.col = "grey", shadow.size = 0.005, cex = 1)
121
+
122
+# Label Dependent
123
+#textrect (elpos[2,], 0.09, 0.03,lab = "AUDPC", box.col = "red",
124
+#          shadow.col = "grey", shadow.size = 0.005, cex = 1)
125
+
126
+# the second outcome variable boxround
127
+textround (elpos[2,], 0.02, 0.04,lab = cadpathname[length(cadpathname)], box.col = colors()[122],
128
+          shadow.col = "grey", shadow.size = 0.005, cex = 1.3)
129
+
130
+# Label independents
131
+nr <- nrow(elpos)
132
+for (i in 3:nr){
133
+	k<-i+5
134
+ # textrect (elpos[i,], 0.09, 0.03,lab = colnames(x)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", #shadow.size = 0.005, cex = 1)
135
+
136
+ textround (elpos[i,], 0.02, 0.04,lab = colnames(corr.tc)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", shadow.size = 0.005, cex = 1)
137
+}
138
+
139
+elpos1 <- coordinates(c(2, length(path.cad)))*1.15
140
+
141
+# adjust coordinates for Residual
142
+elpos1[2,1] <- abs((elpos1[1,1]+elpos1[1,2])*0.75)
143
+
144
+# Specify Arrow positions
145
+#1 Residual to Dependent
146
+fit1 <- matrix(ncol = 2, byrow = TRUE, data = c(1, 2))
147
+#2 Independent to dependent
148
+fit2 <- matrix(ncol=2, byrow = FALSE,
149
+data= c(seq((2+length(path.cad)))[3:(length(path.cad)+2)], rep(2, length(path.cad))))
150
+#3 For cor.x
151
+fromto_CU1 <- t(combn(seq((2+length(path.cad)))[3:(length(path.cad)+2)],2))
152
+#4 For path distances
153
+fromto_ST1 <- rbind(fit1,fit2)
154
+
155
+# Plot Path distance arrows
156
+nr1 <- nrow(fromto_ST1)-1
157
+
158
+arrpos1 <- matrix(ncol = 2, nrow = nr1)
159
+
160
+arrpos1[1, ] <-straightarrow (to = c(elpos1[fromto_ST1[1, 2], 1]-0.5,elpos1[fromto_ST1[1, 2], 2]),
161
+                                from = c(elpos1[fromto_ST1[1, 1], 1]-0.1,elpos1[fromto_ST1[1, 1], 2]),
162
+                     lcol=colors()[(75+7)], lwd = 2, arr.pos = 0.5, arr.length = 0.5)
163
+
164
+ # path line of the second outcome (TC) to the fist outcome variable (disease)
165
+arrpos1[2, ] <-straightarrow (to = c(elpos1[fromto_ST1[1, 2], 1]-0.5,elpos1[fromto_ST1[1, 2], 2]),
166
+                                from = c(elpos[fromto_ST[1, 2], 1],elpos[fromto_ST[1, 2], 2]+0.03),
167
+                     lcol=colors()[122], lwd = 2, arr.pos = 0.6, arr.length = 0.7)
168
+
169
+for (i in 2:nr1){
170
+if(i==3){
171
+k=i+6
172
+  arrpos1[i, ] <-curvedarrow(to = c(elpos1[fromto_ST1[i, 2], 1]-0.5,elpos1[fromto_ST1[i, 2], 2]),
173
+                                from = elpos[fromto_ST[i, 1], ],
174
+                     lcol=colors()[(75+k)], lwd = 2, arr.pos = 0.7, arr.length = 0.5, curve = 0.05)
175
+}else{
176
+k=i+6
177
+  arrpos1[i, ] <-straightarrow (to = c(elpos1[fromto_ST1[i, 2], 1]-0.5,elpos1[fromto_ST1[i, 2], 2]),
178
+                                from = elpos[fromto_ST[i, 1], ],
179
+                     lcol=colors()[(75+k)], lwd = 2, arr.pos = 0.7, arr.length = 0.5)
180
+ }
181
+}
182
+#Label residual path distance arrow
183
+
184
+text(arrpos1[1, 1]-0.04, arrpos1[1, 2] + 0.03,
185
+     paste("Pe", " = ", round(residual.cad, 3), sep=""), cex=0.8)
186
+
187
+# residual coefficient to the firest outcome(disease)
188
+textround(c(elpos1[1,1]-0.18,elpos1[1,2]), 0.02, 0.04,lab = "Residual", box.col =colors()[(75+7)],
189
+          shadow.col = "grey", shadow.size = 0.005, cex = 1)
190
+
191
+textround (c(elpos1[2,1]-0.5,elpos1[2,2]), 0.02, 0.04,lab = disease, box.col = "red",
192
+          shadow.col = "grey", shadow.size = 0.005, cex = 1)
193
+
194
+ # path coefficient of second outcome variable (such as TC)to first outcome variable(disease)
195
+ text(arrpos1[2, 1]+0.1, arrpos1[2, 2]+0.03,
196
+     paste("P","(", cadpathname[length(cadpathname)],")", " = ", round(path.cad[length(path.cad)], 3), sep=""), cex=0.8,col=colors()[122])
197
+
198
+nr1 <- nrow(arrpos1)
199
+for(i in 2:nr1){
200
+ k=i+6
201
+text(arrpos1[i, 1]+(i*0.8-2)*0.09, arrpos1[i, 2]-(i-1)*0.05,paste("P","(", cadpathname[i-1],")"," = ", round(path.cad[i-1], 3), sep=""),cex=1,col=colors()[(75+k)])
202
+
203
+}
204
+
205
+# recover the color and labs
206
+nr <- nrow(elpos)
207
+for (i in 3:nr){
208
+	k<-i+5
209
+ # textrect (elpos[i,], 0.09, 0.03,lab = colnames(x)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", #shadow.size = 0.005, cex = 1)
210
+
211
+ textround (elpos[i,], 0.02, 0.04,lab = colnames(corr.tc)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", shadow.size = 0.005, cex = 1.3)
212
+}
213
+
214
+}
0 215
new file mode 100644
... ...
@@ -0,0 +1,108 @@
1
+snpposit <-
2
+function(SNPdata,SNP_hg19="chr",X="no",LG= 10,main="A",maxd=2000){
3
+SNPdata<-as.data.frame(SNPdata)	
4
+if(SNP_hg19!="chr"){
5
+hg19<-as.character(SNPdata$SNP_hg19)
6
+#print(hg19[1:10])
7
+
8
+chromp<-chrp(hg19)
9
+#print(chromp[1:6,])
10
+}else{
11
+chr<-SNPdata$chr
12
+if(length(chr)==0){
13
+stop("no data")
14
+}
15
+
16
+SNPdata<-SNPdata[with(SNPdata,order(chr,posit)),]
17
+chrn<-as.numeric(SNPdata$chr)
18
+posit<-as.numeric(SNPdata$posit)
19
+chromp<-cbind(chrn,posit)
20
+
21
+#print(CHR)
22
+}
23
+chr<-chromp[,1]
24
+CHR<-unique(chr)
25
+nch<-length(CHR) # number of chromosomes
26
+meanl<-m<-md<-rep(NA,nch)
27
+
28
+#calculate SNP number on each chromosome
29
+nsnpchr<-function(x,LG){
30
+y<-as.numeric(x[,2])
31
+#print(y)
32
+nr<-length(y)
33
+y<-sort(y)
34
+m<-0
35
+ix<-seq(nr-1)	
36
+iy<-seq(from=2,to=nr)
37
+m<-length(which(y[iy]-y[ix]>=LG))
38
+return(m)
39
+}
40
+
41
+#averaged distance between SNPs on each chromosome
42
+mdist<-function(x,LG){
43
+x<-as.data.frame(x)
44
+y<-x[,2]
45
+#print(y)
46
+nr<-length(y)
47
+ y<-sort(y)
48
+ mdist<-(y[nr]-y[1])/((nr-1)*LG)
49
+return(mdist)
50
+
51
+}
52
+
53
+#calculate average distance between SNPs and SNP number on each chromosome
54
+m<-md<-snpn<-rep(0,nch)
55
+for(i in 1:nch){
56
+chrmp<-subset(chromp,chrn==i)
57
+snpn[i]<-nrow(chrmp)
58
+if(snpn[i]!=0){
59
+m[i]<-nsnpchr(subset(chromp,chrn==i),LG)
60
+md[i]<-mdist(subset(chromp,chrn==i),LG)
61
+  }
62
+}
63
+
64
+md[which(md>=maxd)]<-maxd
65
+md[which(is.na(md))]<-0
66
+#print(length(md))
67
+#print(length(CHR))
68
+colors<-c("red","lightsalmon","lightseagreen","mediumspringgreen","mediumorchid4","mediumpurple4","lightskyblue4","mediumseagreen","deeppink",
69
+"mediumvioletred","firebrick","magenta","navyblue","lightsteelblue2","navajowhite","blue","maroon4","orange2","purple","darkorchid1",
70
+"black","cyan")
71
+
72
+chrcol<-rep(NA,nch)
73
+for(i in 1:nch){
74
+ chrcol[i]<-colors[i]
75
+}
76
+if(X=="yes"){
77
+CHR[nch]<-"X"	
78
+}
79
+#print(CHR)
80
+par(oma=c(2,1,2,1))
81
+bp<-barplot(md,width=2,ylab="mean length (kbp) of interval between SNPs", names.arg =paste("chr",CHR,sep=""),col=chrcol,ylim=c(0,(maxd+250)),
82
+main=main,cex.lab=1.5,cex.names=1,cex.main=2.5,cex.axis=1.5)
83
+
84
+# space between lab and column
85
+YY<-rep(NA,nch)
86
+for(i in 1:nch){
87
+	if(!is.na(md[i])){
88
+if(md[i]>=1000){
89
+YY[i]<-md[i]*1.05
90
+   }else if(md[i]<1000&md[i]>=500){
91
+   YY[i]<-md[i]*1.08
92
+   }else if(md[i]<500&md[i]>=200){
93
+   YY[i]<-md[i]*1.2
94
+   }else if(md[i]<200&md[i]>=100){
95
+    YY[i]<-(md[i]*1.8)
96
+    }else if(md[i]<100&md[i]>=50){
97
+    YY[i]<-(md[i]*2.5)
98
+    }else if(md[i]<50&md[i]>0){
99
+    YY[i]<-(md[i]*5)
100
+    }else if(md[i]==0){
101
+    YY[i]<-45
102
+  }
103
+  }
104
+}
105
+#text(x=bp,y=YY,labels=n,col="black")
106
+text(x=bp,y=YY,labels=snpn,col="black",cex=1.3)
107
+return(m)
108
+}
0 109
new file mode 100644
... ...
@@ -0,0 +1,57 @@
1
+ucscannot <-
2
+function(UCSCannot,SNPn,A=3,B=1.9,C=1.3, method=1){
3
+UCSCannot<-as.data.frame(UCSCannot)	
4
+function_unit<-UCSCannot$function_unit
5
+if(is.null(function_unit)){
6
+stop("No function_unit found in the data")
7
+}
8
+genes<-UCSCannot$Symbol
9
+if(is.null(genes)){
10
+stop("No Symbol found in the data")
11
+}
12
+ N<-length(genes)
13
+ gene.n<-length(unique(genes))
14
+
15
+ intron<-subset(UCSCannot,function_unit=="intronic"|function_unit=="non-coding intronic")
16
+ code<-subset(UCSCannot,function_unit=="coding")
17
+ UTR3<-subset(UCSCannot,function_unit=="3utr")
18
+ UTR5<-subset(UCSCannot,function_unit=="5utr")
19
+ ncoding<-subset(UCSCannot,function_unit=="non-coding")
20
+ upstream5<-subset(UCSCannot,function_unit=="5upstream")
21
+ downstream3<-subset(UCSCannot,function_unit=="3downstream")
22
+
23
+ intron<-length(intron[,1])/N
24
+ coding<-length(code[,1])/N
25
+ UTR3<-length(UTR3[,1])/N
26
+ UTR5<-length(UTR5[,1])/N
27
+ intergene<-length(ncoding[,1])/N
28
+ upstream<-length(upstream5[,1])/N
29
+ downstream<-length(downstream3[,1])/N
30
+
31
+ res<-matrix(NA,1,8)
32
+ res[1,]<-c(gene.n,coding,intron,UTR3,UTR5,intergene,upstream,downstream)
33
+ colnames(res)<-c("genes","exons","introns","TUR3","UTR5","intergenes","upstream","downstream")
34
+ res2<-c(coding,intron,intergene,UTR5,upstream,UTR3,downstream)
35
+ oldcexmain<-par(cex.main=A)
36
+ cols=c("gold2","red","magenta","blue","cornflowerblue","limegreen","mediumseagreen")
37
+ main=paste("Distribution of",SNPn,"SNPs within",gene.n,"genes")
38
+ 
39
+ if(method==1){
40
+ annot<-c("exon","intron","intergene","5'UTR","5'upstream","3'UTR","3'downstream")
41
+ lbls <- paste(annot, "\n", round(res2*100,1),"%", sep="")
42
+# oldcexmain<-par(cex.main=A)
43
+ pie3D(res2,labelcex=B,labels=lbls,labelcol=par("fg"), explode=0.1,labelrad=C,main=main)
44
+ }else if(method==2){
45
+  annot<-c("intron","exon","3'downstream","3'UTR","5'upstream","5'UTR","intergene")
46
+  lbls <- paste(round(res2*100,1),"%", sep="")		
47
+ # cols=c("gold2","red","magenta","blue","cornflowerblue","limegreen","mediumseagreen")
48
+ pie3D(res2,labelcex=(B+0.5),labels=lbls,labelcol=par("fg"), explode=0.08,labelrad=C,main=main)
49
+ if (B>=1.8){
50
+ D<-1.8
51
+ }else{
52
+ D<-B}
53
+ legend(0.5,1.05,annot,col=cols,pch=19,cex=D)
54
+ }
55
+ par(oldcexmain)
56
+ return(res)
57
+ }
0 58
new file mode 100644
1 59
Binary files /dev/null and b/data/SNP358.data.rda differ
2 60
new file mode 100644
3 61
Binary files /dev/null and b/data/SNP368annot.data.rda differ
4 62
new file mode 100644
5 63
Binary files /dev/null and b/data/beta.data.rda differ
6 64
new file mode 100644
7 65
Binary files /dev/null and b/data/cad.data.rda differ
8 66
new file mode 100644
9 67
Binary files /dev/null and b/data/lpd.data.rda differ
10 68
new file mode 100644
... ...
@@ -0,0 +1,84 @@
1
+\name{GMRP-package}
2
+\alias{GMRP-package}
3
+\alias{GMRP}
4
+\docType{package}
5
+\title{
6
+\bold{GWAS}-based Mendelian Randomization Path Analysis
7
+}
8
+\description{
9
+\bold{GMRP} is used to perform Mendelian randomization analysis of causal variables on disease of study using SNP beta data from \bold{GWAS} or \bold{GWAS} meta analysis and furthermore execute path analysis of causal variables onto the disease.
10
+}
11
+\details{
12
+\bold{GMRP} can perform analyses of Mendelian randomization (MR),correlation, path of causal variables onto disease of interest and SNP annotation summarization analysis. MR includes SNP selection with given criteria and regression analysis of causal variables on the disease to generate beta values of causal variables on the disease. Using the beta values, \bold{GMRP} performs correlation and path analyses to construct  path diagrams of causal variables to the disease. \bold{GMRP} consists of 8 functions: \emph{chrp, fmerg, mktable,  pathdiagram2, pathdiagram, path, snpposit, ucscannot} and 5 datasets: \code{beta.data, cad.data, lpd.data, SNP358.data and SNP368_annot.data}. Function \emph{chrp} is used to separate string vector hg19 into two numeric vectors: chromosome number and SNP position on chromosomes. Function \emph{fmerg} is used to merge two datasets into one dataset. Function \emph{mktable} performs SNP selection and creates a standard beta table for function path to do path analyses. Function \emph{pathdiagram} is used to create a path diagram of causal variables onto the disease or onto outcome. Function \emph{pathdiagram2} can merg two-level pathdiagrams into one nested pathdiagram where inner path diagram is a path diagram of causal variables contributing onto outcome and the outside path diagram is a diagram of path of causal variables including outcome onto the disease. The five datasets provide examples for running these functions. \code{lpd.data} and \code{cad.data} provide an example to create a standard beta dataset for \emph{path} function to do path analysis and SNP data for SNP annotation analysis by performing \emph{mktable} and \emph{fmerg}. beta.data are a standard beta dataset for path analysis. \code{SNP358.data} provide an example for function \emph{snpposit} to do SNP position annotation analysis and \code{SNP368_annot.data} are for function \emph{ucscannot} to do SNP function annotation analysis. It is specially emphasized that except for that making standard beta table using \emph{mktable} must be done in Unix/Linux system,GMRP can be performed in Windows or Mac OS. This is because GWAS datasets usually are very huge but standard beta table is small. If users'Unix/Linux system has X11 or the other graphics system, then user should perform GMRP in Unix/Linux system, otherwise, user should transfer a standard beta table to a local computer and run GMRP in it.      
13
+
14
+}
15
+\author{
16
+Yuan-De Tan
17
+\email{tanyuande@gmail.com}
18
+
19
+\\Dajiang Liu
20
+
21
+\\Maintainer: Yuan-De Tan
22
+}
23
+\references{
24
+Do, R. et al. 2013. Common variants associated with plasma triglycerides and risk for coronary artery disease. Nat Genet 45: 1345-1352.
25
+
26
+\\Sheehan, N.A. et al. 2008. Mendelian randomisation and causal inference in observational epidemiology. PLoS Med 5: e177.
27
+ 
28
+\\Sheehan, N.A.,et al. 2010. Mendelian randomisation: a tool for assessing causality in observational epidemiology. Methods Mol Biol 713: 153-166.\\Wright, S. 1921. Correlation and causation. J. Agricultural Research 20: 557-585.
29
+
30
+\\Wright, S. 1934. The method of path coefficients Annals of Mathematical Statistics 5 (3): 161-215 5: 161-215.
31
+
32
+}
33
+\keyword{ package }
34
+\seealso{
35
+  \code{\link{path}}, \code{\link{mktable}}, \code{\link{pathdiagram}},\code{\link{pathdiagram2}}, \code{\link[diagram]{plotmat}}, \code{\link[diagram]{plotweb}}
36
+  }
37
+
38
+\examples{
39
+
40
+data(beta.data)
41
+mybeta<-DataFrame(beta.data)
42
+CAD<-beta.data$cad
43
+LDL<-beta.data$ldl
44
+HDL<-beta.data$hdl
45
+TG<-beta.data$tg
46
+TC<-beta.data$tc
47
+#par(mfrow=c(2,2))
48
+plot(LDL,CAD,pch=19,col="blue",xlab="beta of SNPs on LDL",ylab="beta of SNP on CAD", 
49
+ main="A",cex.lab=1.5,cex.axis=1.5,cex.main=2)
50
+abline(lm(CAD~LDL),col="red",lwd=2)
51
+plot(HDL,CAD,pch=19,col="darkgreen",xlab="beta of SNPs on HDL",ylab="beta of SNP on 
52
+ CAD", main="B",cex.lab=1.5,cex.axis=1.5,cex.main=2)
53
+abline(lm(CAD~HDL),col="red",lwd=2)
54
+plot(TG,CAD,pch=19,col=colors()[96],xlab="beta of SNPs on TG",ylab="beta of SNP on
55
+ CAD",main="C",cex.lab=1.5,cex.axis=1.5,cex.main=2)
56
+abline(lm(CAD~TG),col="red",lwd=2)
57
+plot(TC,CAD,pch=19,col=colors()[123],xlab="beta of SNPs on TC",ylab="beta of SNP on
58
+ CAD",main="D",cex.lab=1.5,cex.axis=1.5,cex.main=2)
59
+abline(lm(CAD~TC),col="red",lwd=2)
60
+
61
+mod<-cad~ldl+hdl+tg+tc
62
+pathvalue<-path(betav=mybeta,model=mod,outcome="cad")
63
+
64
+mypath<-matrix(NA,3,4)
65
+mypath[1,]<-c(1.000000,-0.066678, 0.420036,0.764638)
66
+mypath[2,]<-c(-0.066678,1.000000,-0.559718,0.496831)
67
+mypath[3,]<-c(0.420036,-0.559718,1.000000,0.414346)
68
+colnames(mypath)<-c("ldl","hdl","tg","path")
69
+mypath<-DataFrame(mypath)
70
+#mypath
71
+#DataFrame with 3 rows and 4 columns
72
+#        ldl       hdl        tg      path
73
+#  <numeric> <numeric> <numeric> <numeric>
74
+#1  1.000000 -0.066678  0.420036  0.764638
75
+#2 -0.066678  1.000000 -0.559718  0.496831
76
+#3  0.420036 -0.559718  1.000000  0.414346
77
+
78
+#> pathdiagram(pathdata=mypath,disease="cad",R2=0.988243,range=c(1:3))
79
+#Loading required package: shape
80
+#Error in pathcad$path : $ operator is invalid for atomic vectors
81
+mypath<-as.data.frame(mypath)
82
+pathdiagram(pathdata=mypath,disease="cad",R2=0.988243,range=c(1:3))
83
+
84
+}
0 85
new file mode 100644
... ...
@@ -0,0 +1,30 @@
1
+\name{SNP358.data}
2
+\alias{SNP358.data}
3
+\docType{data}
4
+\title{
5
+Data of 358 SNPs
6
+}
7
+\description{
8
+\code{SNP358.data} were obtained from \bold{GWAS} Meta-analyzed datasets of lipoprotein cholesterols and coronary artery disease. The data contain three numeric vectors (columns): SNPID(\verb{rsid}), chromosome number (\verb{chr}) and SNP position on chromosome (posit).
9
+}
10
+\usage{data("SNP358.data")}
11
+\format{
12
+  A data frame with 358 observations on the following 3 variables.
13
+  \describe{
14
+    \item{\code{rsid}}{a character vector}
15
+    \item{\code{chr}}{a numeric vector}
16
+    \item{\code{posit}}{a numeric vector}
17
+  }
18
+}
19
+\details{
20
+These 358 SNPs were chosen by using mktable with Pv = code{5x10e-08},\code{Pc = Pd =0.979} from code{lpd.data and cad.data}. They provide a data example for how to perform annotation analysis of SNP positions.
21
+}
22
+\value{
23
+A set of data with 358 rows(SNPs) and 3 columns(SNP ID, chromosome # and SNP position on chromosomes).
24
+}
25
+
26
+\examples{
27
+data(SNP358.data)
28
+## maybe str(SNP358.data) ; plot(SNP358.data) ...
29
+}
30
+\keyword{datasets}
0 31
new file mode 100644
... ...
@@ -0,0 +1,38 @@
1
+\name{SNP368annot.data}
2
+\alias{SNP368annot.data}
3
+\docType{data}
4
+\title{
5
+Annotation data of \verb{368SNPs}
6
+}
7
+\description{
8
+The annotation data of 368SNPs are used to construct SNP distribution in gene elements (coding region, introne,UTR, etc). The data contain 12 vectors or variables but only Symbol and function_unit are used by ucscannot.R to build SNP distribution in gene elements.
9
+}
10
+\usage{data("SNP368annot.data")}
11
+\format{
12
+  A data frame with 1053 observations on the following 6 variables.
13
+  \describe{
14
+    \item{\code{SNP}}{a string vector}
15
+    \item{\code{Allele}}{a string vector} 
16
+    \item{\code{Strand}}{a numeric vector}
17
+    \item{\code{Symbol}}{a string vector}
18
+    \item{\code{Gene}}{a string vector}
19
+    \item{\code{function_unit}}{a string vector}
20
+  }
21
+}
22
+\details{
23
+\code{SNP368annot.data} were obtained by performing mktable with \code{PV=5x10e-08},\code{Pc=Pd=0.979} on lpd.data and cad.data and SNP tools. \code{SNP368annot.data} provides an practical example for constructing distribution of SNPs in gene elements. Note that \verb{function_units} are gene elements.
24
+}
25
+\value{
26
+A dataset with 1053 rows and 6 columns for results of SNP annotation analysis. See format above.	
27
+}
28
+\source{
29
+http://csg.sph.umich.edu//abecasis/public/lipids2013/
30
+
31
+}
32
+\references{
33
+http://snp-nexus.org/test/snpnexus_19427/
34
+}
35
+\examples{
36
+data(SNP368annot.data)
37
+}
38
+\keyword{datasets}
0 39
new file mode 100644
... ...
@@ -0,0 +1,44 @@
1
+\name{beta.data}
2
+\alias{beta.data}
3
+\docType{data}
4
+\title{
5
+Beta Data Of SNP Regressed on Causal Variables and Disease 
6
+}
7
+\description{
8
+Beta data are a matrix dataset consisting of 5 columns: cad, ldl, hdl, tg, and tc with 368 rows.
9
+}
10
+\usage{data("beta.data")}
11
+\format{
12
+  A data frame with 368 observations on the following 5 variables.
13
+  \describe{
14
+    \item{\code{cad}}{a numeric vector}
15
+    \item{\code{ldl}}{a numeric vector}
16
+    \item{\code{hdl}}{a numeric vector}
17
+    \item{\code{tg}}{a numeric vector}
18
+    \item{\code{tc}}{a numeric vector}
19
+  }
20
+}
21
+\details{
22
+Beta data are a matrix consisting of regression coefficients of 368 SNPs on cad, ldl, hdl, tg, tc where \verb{cad} is coronary artery disease, \verb{ldl} is low-density lipoprotein cholesterol, \verb{hdl}, high-density lipoprotein cholesterol, \verb{tg},triglycerides and \verb{tc}, total cholesterol in plasma. These 368 SNPs were obtained by using \verb{mktable} from \bold{GWAS} meta-analyzed data.
23
+}
24
+\value{
25
+A set of real regression coefficients of 368 SNPs on disease and causal variables.	
26
+}
27
+\source{
28
+http://csg.sph.umich.edu//abecasis/public/lipids2013/
29
+
30
+\\http://www.cardiogramplusc4d.org/downloads/
31
+
32
+}
33
+\references{
34
+Willer CJ et al. Discovery and refinement of loci associated with lipid levels. Nat. Genet. 2013. doi:10.1038/ng.2797.
35
+
36
+\\Schunkert, H. et al. 2011. Large-scale association analysis identifies 13 new susceptibility loci for coronary artery disease. Nat Genet 43: 333-338.[online]
37
+
38
+\\Schunkert H, Konig IR, Kathiresan S, Reilly MP, Assimes TL, Holm H et al. Large-scale association analysis identifies 13 new susceptibility loci for coronary artery disease. Nat Genet. 2011 43: 333-338.
39
+}
40
+\examples{
41
+data(beta.data)
42
+## maybe str(beta.data) ; plot(beta.data) ...
43
+}
44
+\keyword{datasets}
0 45
new file mode 100644
... ...
@@ -0,0 +1,44 @@
1
+\name{cad.data}
2
+\alias{cad.data}
3
+\docType{data}
4
+\title{
5
+bold{GWAS} Meta-analyzed Data of Coronary Artery Disease
6
+}
7
+\description{
8
+\code{cad.data} are a matrix dataset consisting of 12 variables such as \verb{SNPID}, \verb{SNP} position on chromosomes, allele and alternative allele, allelic frequencies and 1069 \verb{SNP}s.
9
+}
10
+\usage{data("cad.data")}
11
+\format{
12
+  A data frame with 1609 observations on the following 12 variables.
13
+  \describe{
14
+    \item{\code{SNP}}{a character vector}
15
+    \item{\code{chr_pos_b36}}{a character vector}
16
+    \item{\code{reference_allele}}{a character vector}
17
+    \item{\code{other_allele}}{a character vector}
18
+    \item{\code{ref_allele_frequency}}{a numeric vector}
19
+    \item{\code{pvalue}}{a numeric vector}
20
+    \item{\code{het_pvalue}}{a numeric vector}
21
+    \item{\code{log_odds}}{a numeric vector}
22
+    \item{\code{log_odds_se}}{a numeric vector}
23
+    \item{\code{N_case}}{a numeric vector}
24
+    \item{\code{N_control}}{a numeric vector}
25
+    \item{\code{model}}{a character vector}
26
+  }
27
+}
28
+\details{
29
+\code{cad.data}, also called \verb{CARDIoGRAM} \bold{GWAS}, are a meta-analyzed GWAS data from \bold{GWAS} studies of European descent imputed to \bold{HapMap2} involving 22,233 cases and 64,762 controls. The data were downloaded from the following website.
30
+}
31
+\value{
32
+A data sheet consisting of 1609 rows(SNPs) and 12 columns(character vectors such as \verb{SNPID}  and allele, numeric vector such as allele frequency and beta coefficient. See data format above). 	
33
+}
34
+\source{
35
+http://www.cardiogramplusc4d.org/downloads/
36
+}
37
+\references{
38
+Schunkert H, Konig IR, Kathiresan S, Reilly MP, Assimes TL, Holm H et al. Large-scale association analysis identifies 13 new susceptibility loci for coronary artery disease. \bold{Nat Genet.} 2011 \bold{43}: 333-338.
39
+}
40
+\examples{
41
+data(cad.data)
42
+## maybe str(cad.data) ; plot(cad.data) ...
43
+}
44
+\keyword{datasets}
0 45
new file mode 100644
... ...
@@ -0,0 +1,47 @@
1
+\name{chrp}
2
+\alias{chrp}
3
+\title{
4
+Split hg19
5
+}
6
+\description{
7
+Split string hg19 into two numeric columns: chr and posit.
8
+}
9
+\usage{
10
+chrp(hg)
11
+}
12
+
13
+\arguments{
14
+  \item{hg}{
15
+character vector represented with code{chr##.#######} where chr## is chromosome number and \code{.#######} is SNP site (bp).
16
+}
17
+}
18
+\details{
19
+chrp can convert \code{chr##.#######} into two numeric columns: chr(chromosome number) and posit(SNP position)
20
+}
21
+\value{
22
+Return two numeric vectors: chrosomome number and SNP position
23
+}
24
+\note{
25
+If there is chrX.######### in the data sheet, then user should change chrX.######## to chr23.#######.	
26
+}
27
+\author{
28
+Yuan-De Tan
29
+\email{tanyuande@gmail.com}
30
+
31
+\\Dajiang Liu
32
+}
33
+\note{
34
+hg may also be hg18. User can also use packages GenomicRanges to retrieve chromosome # and SNP position.
35
+}
36
+
37
+\seealso{
38
+ \code{\link{mktable}},\code{link{GenomicRanges}[Granges]},\code{link{GenomicRanges}[IRangs]},\code{link{GenomicRanges}[DataFrame]}
39
+}
40
+\examples{
41
+data(lpd.data)
42
+lpd<-lpd.data
43
+hg19<-lpd$SNP_hg19.HDL
44
+chr<-chrp(hg=hg19)
45
+}
46
+\keyword{chromosome}
47
+\keyword{SNP}
0 48
new file mode 100644
... ...
@@ -0,0 +1,83 @@
1
+\name{fmerg}
2
+\alias{fmerg}
3
+\title{
4
+Merge Two \bold{GWAS} Result Data Sheets
5
+}
6
+\description{
7
+\emph{fmerg} can be used to merge two \bold{GWAS} result data sheets with the same key \verb{ID(SNP ID)} into one data sheet.
8
+}
9
+\usage{
10
+fmerg(fl1, fl2,ID1, ID2, A, B, method)
11
+}
12
+\arguments{
13
+  \item{fl1}{
14
+R object: data file 1
15
+}
16
+  \item{fl2}{
17
+R object: data file 2
18
+}
19
+  \item{ID1}{
20
+key id (SNP ID such as rsid) in file 1
21
+}
22
+  \item{ID2}{
23
+key id (SNP ID such as rsid) in file 2
24
+}
25
+  \item{A}{
26
+postfix for file 1: A=".W1". W1 may be any identifier in file 1. Default is A="".
27
+}
28
+  \item{B}{
29
+postfix for file 2: B=".W2". W2 may be any identifier in file2. Default is B="".
30
+}
31
+  \item{method}{
32
+method for merging. See details.
33
+}
34
+}
35
+\details{
36
+fl1 and fl2 are two \bold{GWAS} result data files from different studies or with different risk variables. They contain \verb{SNPID}, \verb{hg18}, \verb{hg19}(positions), beta values, allele, frequency, and so on. The method has four options: method="No","NO" or "no" means that all data with unmatch \verb{SNP}s are not saved in the merged file; method="All","ALL" or "all" lets fmerg save all the data with unmatched \verb{SNP}s from two files but they are not paired one-by-one. This is different from R \emph{merge} function. method="file1" will save the data with unmatched \verb{SNP}s only from file 1 in the merged file and method="file2" allows function \emph{fmerg} to save the data with unmatched \verb{SNP}s from file2 in the merged file.
37
+
38
+}
39
+\value{
40
+Return a joined data sheet.
41
+}
42
+
43
+\author{
44
+Yuan-De Tan
45
+\email{tanyuande@gmail.com}
46
+
47
+\\Dajiang Liu
48
+}
49
+\note{
50
+Function fmerg can also be applied to the other types of data.
51
+}
52
+
53
+\seealso{
54
+ \code{\link[base]{merge}}
55
+}
56
+\examples{
57
+data1<-matrix(NA,20,4)
58
+data2<-matrix(NA,30,7)
59
+SNPID1<-paste("rs",seq(1:20),sep="")
60
+SNPID2<-paste("rs",seq(1:30),sep="")
61
+data1[,1:4]<-c(round(runif(20),4),round(runif(20),4),round(runif(20),4),round(runif(20),4))
62
+data2[,1:4]<-c(round(runif(30),4),round(runif(30),4),round(runif(30),4),round(runif(30),4))
63
+data2[,5:7]<-c(round(seq(30)*runif(30),4),round(seq(30)*runif(30),4),seq(30))
64
+data1<-cbind(SNPID1,as.data.frame(data1))
65
+data2<-cbind(SNPID2,as.data.frame(data2))
66
+dim(data1)
67
+dim(data2)
68
+colnames(data1)<-c("SNP","var1","var2","var3","var4")
69
+colnames(data2)<-c("SNP","var1","var2","var3","var4","V1","V2","V3")
70
+data12<-fmerg(fl1=data1,fl2=data2,ID1="SNP",ID2="SNP",A=".dat1",B=".dat2",method="No")
71
+#data12[1:3,]
72
+#  SNP.dat1 var1.dat1 var2.dat1 var3.dat1 var4.dat1 SNP.dat2 var1.dat2 var2.dat2
73
+#1      rs1    0.9152    0.9853    0.9879    0.9677      rs1    0.5041    0.5734
74
+#2     rs10    0.3357     0.116    0.3408    0.1867     rs10    0.9147    0.9294
75
+#3     rs11    0.8004    0.8856    0.2236    0.4642     rs11    0.9262    0.5831
76
+#  var3.dat2 var4.dat2     V1     V2 V3
77
+#1    0.4933    0.6766 0.1864 0.6836  1
78
+#2    0.4104    0.1327 3.2192 1.4166 10
79
+#3    0.8541    0.6228 1.1803 1.9044 11
80
+
81
+}
82
+\keyword{data}
83
+\keyword{merge}
0 84
new file mode 100644
... ...
@@ -0,0 +1,72 @@
1
+\name{lpd.data}
2
+\alias{lpd.data}
3
+\docType{data}
4
+\title{
5
+\bold{GWAS} Meta-analyzed Data of Lipoprotein Cholesterols
6
+}
7
+\description{
8
+lpd.data are standard \bold{GWAS} Meta-analyzed dataset of lipoprotein cholesterols. It was constructed by merging four datasets: \code{Mc_HDL.txt,Mc_LDL.txt, Mc_TC.txt and Mc_TG.txt}.
9
+}
10
+\usage{data("lpd.data")}
11
+\format{
12
+  A data frame with 1609 observations on the following 40 variables.
13
+  \describe{
14
+    \item{\code{SNP_hg18.HDL}}{a character vector}
15
+    \item{\code{SNP_hg19.HDL}}{a character vector}
16
+    \item{\code{rsid.HDL}}{a character vector}
17
+    \item{\code{A1.HDL}}{a character vector}
18
+    \item{\code{A2.HDL}}{a character vector}
19
+    \item{\code{beta.HDL}}{a numeric vector}
20
+    \item{\code{se.HDL}}{a numeric vector}
21
+    \item{\code{N.HDL}}{a numeric vector}
22
+    \item{\code{P.value.HDL}}{a numeric vector}
23
+    \item{\code{Freq.A1.1000G.EUR.HDL}}{a numeric vector}
24
+    \item{\code{SNP_hg18.LDL}}{a character vector}
25
+    \item{\code{SNP_hg19.LDL}}{a character vector}
26
+    \item{\code{rsid.LDL}}{a character vector}
27
+    \item{\code{A1.LDL}}{a character vector}
28
+    \item{\code{A2.LDL}}{a character vector}
29
+    \item{\code{beta.LDL}}{a numeric vector}
30
+    \item{\code{se.LDL}}{a numeric vector}
31
+    \item{\code{N.LDL}}{a numeric vector}
32
+    \item{\code{P.value.LDL}}{a numeric vector}
33
+    \item{\code{Freq.A1.1000G.EUR.LDL}}{a numeric vector}
34
+    \item{\code{SNP_hg18.TG}}{a character vector}
35
+    \item{\code{SNP_hg19.TG}}{a character vector}
36
+    \item{\code{rsid.TG}}{a character vector}
37
+    \item{\code{A1.TG}}{a character vector}
38
+    \item{\code{A2.TG}}{a character vector}
39
+    \item{\code{beta.TG}}{a numeric vector}
40
+    \item{\code{se.TG}}{a numeric vector}
41
+    \item{\code{N.TG}}{a numeric vector}
42
+    \item{\code{P.value.TG}}{a numeric vector}
43
+    \item{\code{Freq.A1.1000G.EUR.TG}}{a numeric vector}
44
+    \item{\code{SNP_hg18.TC}}{a character vector}
45
+    \item{\code{SNP_hg19.TC}}{a character vector}
46
+    \item{\code{rsid.TC}}{a character vector}
47
+    \item{\code{A1.TC}}{a character vector}
48
+    \item{\code{A2.TC}}{a character vector}
49
+    \item{\code{beta.TC}}{a numeric vector}
50
+    \item{\code{se.TC}}{a numeric vector}
51
+    \item{\code{N.TC}}{a numeric vector}
52
+    \item{\code{P.value.TC}}{a numeric vector}
53
+    \item{\code{Freq.A1.1000G.EUR.TC}}{a numeric vector}
54
+  }
55
+}
56
+\details{
57
+These four \bold{GWAS} Meta-analyzed data of lipoprotein cholesterols were downloaded from the following website.
58
+}
59
+\value{
60
+A data sheet consisting of 1609 rows (SNPs) and 40 columns(character vectors such as SNPID and allele, numeric vector such as allele frequency, beta coefficient. See data format above). 		
61
+}
62
+\source{
63
+http://csg.sph.umich.edu//abecasis/public/lipids2013/
64
+}
65
+\references{
66
+Willer CJ et al. Discovery and refinement of loci associated with lipid levels. \emph{Nat. Genet.} 2013. doi:10.1038/ng.2797.
67
+}
68
+\examples{
69
+data(lpd.data)
70
+## maybe str(lpd.data) ; plot(lpd.data) ...
71
+}
72
+\keyword{datasets}
0 73
new file mode 100644
... ...
@@ -0,0 +1,176 @@
1
+\name{mktable}
2
+\alias{mktable}
3
+\title{
4
+Selection of SNPs and Creation of A Standard Table for Mendelian Randomization and Path Analyses
5
+}
6
+\description{
7
+mktable is used to choose SNPs with \code{LG, Pv, Pc} and \code{Pd} and create a standard \verb{SNP} beta table for \code{Mendelian randomization} and \code{path analysis}, see details.
8
+}
9
+\usage{
10
+mktable(cdata, ddata,rt, varname, LG, Pv, Pc, Pd)
11
+}
12
+\arguments{
13
+  \item{cdata}{
14
+causal variable \bold{GWAS} data or \bold{GWAS} meta-analysed data containing \verb{SNP} \code{ID}, \verb{SNP} position, chromosome, allele, allelic frequency, beta value, \code{sd}, sample size, etc.
15
+}
16
+  \item{ddata}{
17
+disease \bold{GWAS} data or \bold{GWAS} meta-analysed data containing \verb{SNP} \code{ID}, \verb{SNP} position, chromosome, allele, allelic frequency, beta value, \code{sd}, sample size, etc.
18
+}
19
+  \item{rt}{
20
+a string that specifies type of returning table. It has two options: \code{rt="beta"} returns beta table or \code{rt="path"} returns \verb{SNP} direct path coefficient table. Default is "beta".
21
+}
22
+  \item{varname}{
23
+a required string set that lists names of undefined causal variables for Mendelian randomization and path analyses. The first name is disease name. Here an example given is \code{varname <-c("CAD","LDL","HD","TG","TC")}.
24
+}
25
+  \item{LG}{
26
+a numeric parameter. \code{LG} is a given minimum interval distnce between \verb{SNP}s and used to choose \verb{SNP}s with. Default \code{LG=1}
27
+}
28
+  \item{Pv}{
29
+a numeric parameter. \code{Pv} is a given maximum p-value that is used to choose \verb{SNP}s. Default Pv=5e-8
30
+}
31
+  \item{Pc}{
32
+a numeric parameter. \code{Pc} is a given proportion of sample size to maximum sample size in causal variable data and used to choose \verb{SNP}s. Default \code{Pc=0.979}
33
+}
34
+  \item{Pd}{
35
+a numeric parameter. Pd is a given proportion of sample size to the maximum sample size in disease data and used to choose \verb{SNP}s. Default \code{Pd =0.979}.
36
+}
37
+}
38
+\details{
39
+The standard \bold{GWAS} cdata set should have the format with following columns: chrn, posit, rsid, \code{a1.x1, a1.x2}, \dots, \code{a1.xn}, \code{freq.x1, freq.x2}, \dots, \code{freq.xn}, \code{beta.x1, beta.x2}, \dots, \code{beta.xn}, \code{sd.x1, sd.x2}, \dots, \code{sd.xn}, \code{pvj}, \code{N.x1, N.x2}, \dots, \code{N.xn}, \code{pcj}. The standard \bold{GWAS} \emph{ddata} set should have\code{hg.d}, \code{SNP.d},\code{a1.d}, \code{freq.d}, \code{beta.d}, \code{N.case},\code{N.ctr},\code{freq.case} where \code{x1, x2}, \dots, \code{xn} are causal variables. See example.
40
+\describe{
41
+\item{beta}{ is a numeric vector that is a column of beta values for regression of SNPs on variable vector \code{X={x1, x2, \dots, xn}}.
42
+}	
43
+\item{freq}{ is a numeric vector that is a column of frequencies of allele 1 with respect to variable vector \code{X={x1, x2, \dots, xn}}.
44
+}
45
+\item{sd}{is a numeric vector that is a column of standard deviations of variable \code{x1,x2}, \dots, \code{xn} specific to \verb{SNP}. Note that here sd is not beta standard deviation. If sd is not specifical to \verb{SNP}s, then sd.xi has the same value for all SNPs in variable \code{i}.
46
+	}
47
+\item{d}{denotes disease.}
48
+\item{N}{is sample size.}
49
+\item{freq.case}{is frequency of disease.}
50
+\item{chrn}{is a numeric vector for chromosome #.}
51
+\item{posit}{is a numeric vector for \verb{SNP} positions on chromosome #. Some time, \verb{chrn} and posit are combined into string vector: \code{hg19/hg18}.
52
+	}
53
+\item{pvj}{is defined as p-value, \code{pcj} and \code{pdj} as proportions of sample size for \verb{SNP} \code{j} to the maximum sample size in the causal variable data and in disease data, respectively.
54
+  }
55
+ }
56
+}
57
+\value{
58
+Return a standard \verb{SNP} beta or \verb{SNP} path table containing \code{m} \verb{SNP}s chosen with \code{LG, Pv, Pc and Pd} and \code{n} variables and disease for \code{Mendelian randomization} and \code{path analysis}.
59
+}
60
+\references{
61
+Do, R. et al. 2013. Common variants associated with plasma triglycerides and risk for coronary artery disease. \emph{Nat Genet} \bold{45}: 1345-1352.
62
+
63
+\\Sheehan, N.A. et al. 2008. Mendelian randomisation and causal inference in observational epidemiology. \emph{PLoS Med} \bold{5}: e177.
64
+
65
+\\Sheehan, N.A.,et al. 2010. Mendelian randomisation: a tool for assessing causality in observational epidemiology. \emph{Methods Mol Biol} \bold{713}: 153-166.
66
+
67
+\\Willer, C.J. Schmidt, E.M. Sengupta, S. Peloso, G.M. Gustafsson, S. Kanoni, S. Ganna, A. Chen, J.,Buchkovich, M.L. Mora, S. et al (2013) Discovery and refinement of loci associated with lipid levels. \emph{Nat Genet} \bold{45}: 1274-1283.
68
+}