git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/GMRP@115200 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,18 @@ |
1 |
+Package: GMRP |
|
2 |
+Type: Package |
|
3 |
+Title: GWAS-based Mendelian Randomization and Path Analyses |
|
4 |
+Version: 0.99.2 |
|
5 |
+Date: 2015-11-04 |
|
6 |
+Author: Yuan-De Tan and Dajiang Liu |
|
7 |
+Maintainer: Yuan-De Tan <tanyuande@gmail.com> |
|
8 |
+Description: Perform Mendelian randomization analysis of multiple SNPs |
|
9 |
+ to determine risk factors causing disease of study and to |
|
10 |
+ exclude confounding variabels and perform path analysis to |
|
11 |
+ construct path of risk factors to the disease. |
|
12 |
+License: GPL (>= 2) |
|
13 |
+Depends: R(>= 3.3.0),stats,utils,graphics, grDevices, diagram, plotrix, |
|
14 |
+ base,GenomicRanges |
|
15 |
+Suggests: BiocStyle, BiocGenerics, VariantAnnotation |
|
16 |
+LazyLoad: yes |
|
17 |
+biocViews: Sequencing, Regression, SNP |
|
18 |
+NeedsCompilation: no |
0 | 19 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,17 @@ |
1 |
+exportPattern("^[[:alpha:]]+") |
|
2 |
+export(fmerg) |
|
3 |
+export(chrp) |
|
4 |
+export(mktable) |
|
5 |
+export(path) |
|
6 |
+export(pathdiagram) |
|
7 |
+export(pathdiagram2) |
|
8 |
+export(snpposit) |
|
9 |
+export(ucscannot) |
|
10 |
+import(graphics) |
|
11 |
+import(utils) |
|
12 |
+import(diagram) |
|
13 |
+import(plotrix) |
|
14 |
+#import(data.table) |
|
15 |
+import(grDevices) |
|
16 |
+import(stats) |
|
17 |
+import(GenomicRanges) |
0 | 18 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,14 @@ |
1 |
+chrp <- |
|
2 |
+function(hg){ |
|
3 |
+hg<-as.character(hg) |
|
4 |
+HG<-unlist(strsplit(hg,split=":")) |
|
5 |
+n<-length(hg) |
|
6 |
+i<-seq(n) |
|
7 |
+k1<-i*2-1 |
|
8 |
+k2<-i*2 |
|
9 |
+chr<-HG[k1] |
|
10 |
+posit<-as.numeric(HG[k2]) |
|
11 |
+CHR<-unlist(strsplit(chr,split="chr")) |
|
12 |
+chrn<-as.numeric(CHR[k2]) |
|
13 |
+return(cbind(chrn,posit)) |
|
14 |
+} |
0 | 15 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,39 @@ |
1 |
+fmerg <- |
|
2 |
+function(fl1,fl2,ID1,ID2,A="",B="",method="No"){ |
|
3 |
+ |
|
4 |
+try (if(is.null(dim(fl1))|is.null(dim(fl2))) |
|
5 |
+ stop("No data to be merged") |
|
6 |
+ ) |
|
7 |
+fl1<-cbind(fl1[,ID1],as.data.frame(fl1)) |
|
8 |
+colnames(fl1)[1]<-"SNPID" |
|
9 |
+fl2<-cbind(fl2[,ID2],as.data.frame(fl2)) |
|
10 |
+colnames(fl2)[1]<-"SNPID" |
|
11 |
+method<-tolower(method) |
|
12 |
+if(method=="no"|method=="n"){ |
|
13 |
+merge.dat<-merge(fl1,fl2,by="SNPID",suffixes=c(A,B),sort=TRUE) |
|
14 |
+#merge.dat<-subset(merge.dat,SNPID!=".") |
|
15 |
+} |
|
16 |
+else if(method=="file1"){ |
|
17 |
+merge.dat<-merge(fl1,fl2,by="SNPID",suffixes=c(A,B),all.x=TRUE,sort=TRUE) |
|
18 |
+} |
|
19 |
+else if(method=="file2"){ |
|
20 |
+merge.dat<-merge(fl1,fl2,by="SNPID",suffixes=c(A,B),all.y=TRUE,sort=TRUE) |
|
21 |
+} |
|
22 |
+else if(method=="all"|method=="a"){ |
|
23 |
+colnames(fl1)[1]<-"SNPID1" |
|
24 |
+colnames(fl2)[2]<-"SNPID2" |
|
25 |
+rownames(fl1)<-fl1$SNPID1 |
|
26 |
+rownames(fl2)<-fl2$SNPID2 |
|
27 |
+pos.all <- unique(c(fl1$SNPID1,fl2$SNPID2)) |
|
28 |
+ind.fl1 <- match(pos.all,fl1$SNPID1) |
|
29 |
+ind.fl2 <- match(pos.all,fl2$SNPID2) |
|
30 |
+fl1.new <- fl1[ind.fl1,] |
|
31 |
+fl2.new<-fl2[ind.fl1,] |
|
32 |
+colnames(fl1.new) <- paste(A,colnames(fl1),sep='.') |
|
33 |
+colnames(fl2.new) <- paste(B,colnames(fl2),sep='.') |
|
34 |
+merge.dat<-cbind(as.data.frame(fl1.new),as.data.frame(fl2.new)) |
|
35 |
+mergdata<-merge.dat[,-"SNPID2"] |
|
36 |
+} |
|
37 |
+mergdata<-merge.dat[,-1] |
|
38 |
+return(mergdata) |
|
39 |
+} |
0 | 40 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,132 @@ |
1 |
+mktable <- |
|
2 |
+function(cdata,ddata,rt="beta",varname,LG=1,Pv=0.00000005,Pc=0.979,Pd=0.979){ |
|
3 |
+ |
|
4 |
+try( |
|
5 |
+ if(is.null(cdata)) |
|
6 |
+ stop("No course data") |
|
7 |
+ ) |
|
8 |
+ |
|
9 |
+try( |
|
10 |
+ if(is.null(ddata)) |
|
11 |
+ stop("No disease data") |
|
12 |
+ ) |
|
13 |
+ |
|
14 |
+cdata<-as.data.frame(cdata) |
|
15 |
+ddtat<-as.data.frame(ddata) |
|
16 |
+ |
|
17 |
+pvj<-cdata$pvj |
|
18 |
+pcj<-cdata$pcj |
|
19 |
+ |
|
20 |
+#print(length(posit)) |
|
21 |
+newcdat<-subset(cdata,(pvj<=Pv&pcj>=Pc)) |
|
22 |
+rsid<-newcdat$rsid |
|
23 |
+SNP.d<-ddata$SNP.d |
|
24 |
+ |
|
25 |
+idx<-is.element(SNP.d,rsid) |
|
26 |
+SNP<-SNP.d[idx] |
|
27 |
+ND<-ddata$N.case[idx]+ddata$N.ctr[idx] |
|
28 |
+pdj<-ND/max(ND) |
|
29 |
+newcdat<-as.data.frame(newcdat) |
|
30 |
+ |
|
31 |
+ddata<-cbind(ddata[idx,],pdj) |
|
32 |
+newcd<-fmerg(fl1=newcdat,fl2=ddata,ID1="rsid",ID2="SNP.d",A="",B="",method="no") |
|
33 |
+sbnewdat<-subset(newcd,pdj>=Pd) |
|
34 |
+sbnewdat<-as.data.frame(sbnewdat) |
|
35 |
+ |
|
36 |
+if(LG>2){ |
|
37 |
+ |
|
38 |
+sbnewdat1<-sbnewdat[with(sbnewdat,order(chrn,posit)),] |
|
39 |
+ |
|
40 |
+chrn<-sbnewdat1$chrn |
|
41 |
+chrn.vect<-unique(chrn) |
|
42 |
+#ch<-chrn.vect[1] |
|
43 |
+posit<-sbnewdat1$posit |
|
44 |
+ |
|
45 |
+#distance<-posit[max(ix)]-posit[min(ix)] |
|
46 |
+distance<-rep(NA,length(chrn.vect)) |
|
47 |
+posit.new<-rep(NA,length(posit)) |
|
48 |
+for(ch in chrn.vect){ |
|
49 |
+ # print(ch) |
|
50 |
+ ix<-which(chrn==ch) |
|
51 |
+ # print(ix) |
|
52 |
+ distance[ch]<-round((posit[max(ix)]-posit[min(ix)])/LG,0) |
|
53 |
+ if(is.na(distance[ch])==FALSE){ |
|
54 |
+ if(distance[ch]<=1){ |
|
55 |
+ posit.new[min(ix)]<-posit[min(ix)] |
|
56 |
+ }else{ |
|
57 |
+ |
|
58 |
+ for(i in min(ix):(max(ix)-2)){ |
|
59 |
+ if((posit[(i+1)]-posit[i]>LG)&(posit[(i+2)]-posit[(i+1)])>LG){ |
|
60 |
+ posit.new[i]<-posit[i] |
|
61 |
+ posit.new[(i+1)]<-posit[(i+1)] |
|
62 |
+ } |
|
63 |
+ } |
|
64 |
+ } |
|
65 |
+ } |
|
66 |
+ } |
|
67 |
+# check if posit.new is in posit, T for yes and F for no |
|
68 |
+ indx<-is.element(posit,posit.new) |
|
69 |
+ sbnewdat2<-sbnewdat1 |
|
70 |
+ sbnewdat3<-sbnewdat2[indx,] |
|
71 |
+ }else{ |
|
72 |
+ sbnewdat3<-sbnewdat |
|
73 |
+ |
|
74 |
+ } |
|
75 |
+ |
|
76 |
+ dim(sbnewdat3) |
|
77 |
+sbnewdat3<-data.frame(sbnewdat3) |
|
78 |
+chr<-sbnewdat3$chrn |
|
79 |
+nSNP<-sbnewdat3$rsid |
|
80 |
+nposit<-sbnewdat3$posit |
|
81 |
+ |
|
82 |
+freqcase<-sbnewdat3$freq.case |
|
83 |
+ #print(is.numeric(freqcase)) |
|
84 |
+ ln<-length(varname) |
|
85 |
+ rn<-nrow(sbnewdat3) |
|
86 |
+ betay<-alle<-freq<-sd<-matrix(NA,nrow=rn,ncol=ln) |
|
87 |
+ betay[,1]<-sbnewdat3$beta.d # disease beta value vector of SNPs |
|
88 |
+ alle[,1]<-sbnewdat3$a1.d |
|
89 |
+ freq[,1]<-sbnewdat3$freq.d |
|
90 |
+ sd[,1]<-sqrt(freqcase*(1-freqcase)) |
|
91 |
+ for(i in 2:ln){ |
|
92 |
+ k<-(2+2*(ln-1)+i) |
|
93 |
+ betay[,i]<-sbnewdat3[,k] |
|
94 |
+ k1<-(2+(ln-1)+i) |
|
95 |
+ freq[,i]<-sbnewdat3[,k1] |
|
96 |
+ k2<-2+i |
|
97 |
+ alle[,i]<-sbnewdat3[,k2] |
|
98 |
+ k3<-(2+3*(ln-1)+i) |
|
99 |
+ sd[,i]<-sbnewdat3[,k3] |
|
100 |
+ } |
|
101 |
+ #print(head(freq)) |
|
102 |
+ rn<-nrow(betay) |
|
103 |
+ for(i in 2:ln){ |
|
104 |
+ for(j in 1:rn){ |
|
105 |
+ if(identical(alle[j,1],alle[j,i])==FALSE){ |
|
106 |
+ betay[j,i]<--betay[j,i] |
|
107 |
+ } |
|
108 |
+ } |
|
109 |
+ } |
|
110 |
+ |
|
111 |
+colnames(betay)<-varname |
|
112 |
+ |
|
113 |
+betax<--matrix(NA,nrow=rn,ncol=ln) |
|
114 |
+idx<-seq_len(ln) |
|
115 |
+betax[,idx]<-betay[,idx]*sqrt((freq[,idx]*(1-freq[,idx]))/sd[,idx]) |
|
116 |
+ |
|
117 |
+ betay<-cbind(nposit,betay) |
|
118 |
+ betay<-cbind(chr,nSNP,as.data.frame(betay)) |
|
119 |
+ |
|
120 |
+ colnames(betay)[1:3]<-c("chr","SNP","posit") |
|
121 |
+ colnames(betax)<-varname |
|
122 |
+ betax<-cbind(chr,nSNP,nposit,betax) |
|
123 |
+ colnames(betax)[1:3]<-c("chr","SNP","posit") |
|
124 |
+ |
|
125 |
+rt<-tolower(rt) |
|
126 |
+if (rt=="beta"|rt=="b"){ |
|
127 |
+ return(betay) |
|
128 |
+ }else if (rt=="path"|rt=="p"){ |
|
129 |
+ return(betax) |
|
130 |
+ } |
|
131 |
+ |
|
132 |
+ } |
0 | 133 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,86 @@ |
1 |
+path <- |
|
2 |
+function(betav,model,outcome){ |
|
3 |
+betav<-as.data.frame(betav) |
|
4 |
+rn<-nrow(betav) # number of beta values in each beta variable |
|
5 |
+cn<-ncol(betav) |
|
6 |
+#betav<-DataFrame(betav) |
|
7 |
+fit<-summary(lm(model,data=betav)) |
|
8 |
+ |
|
9 |
+print(fit) |
|
10 |
+corr<-cor(betav) |
|
11 |
+cvar<-cov(betav) |
|
12 |
+ |
|
13 |
+beta.xx<-rep(0,(cn-1)) |
|
14 |
+ |
|
15 |
+for(i in 2:cn){ |
|
16 |
+beta.xx[i-1]<-fit$coef[i,1] |
|
17 |
+} |
|
18 |
+ |
|
19 |
+varx<-rep(0,(cn-1)) |
|
20 |
+vary<-cvar[1,1] |
|
21 |
+corrx<-corr[-1,-1] |
|
22 |
+cvarx<-cvar[-1,-1] |
|
23 |
+ |
|
24 |
+for(i in 1:(cn-1)){ |
|
25 |
+varx[i]<-cvarx[i,i] |
|
26 |
+} |
|
27 |
+ |
|
28 |
+py<-rep(0,(cn-1)) # direction path coefficients |
|
29 |
+ |
|
30 |
+for(i in 1:(cn-1)){ |
|
31 |
+py[i]<-beta.xx[i]*(varx[i]/vary)^0.5 |
|
32 |
+#f2[i]<-(varx[i]/vary)^0.5 |
|
33 |
+} |
|
34 |
+print(py) |
|
35 |
+#print(vratio) |
|
36 |
+path<-matrix(0,(cn-1),(cn-1)) |
|
37 |
+for(i in 1:(cn-1)){ |
|
38 |
+ for(j in 1:(cn-1)){ |
|
39 |
+path[i,j]<-corrx[i,j]*py[j] |
|
40 |
+ } |
|
41 |
+} |
|
42 |
+pe<-fit$sigma/sqrt(vary) |
|
43 |
+ |
|
44 |
+colnames(path)<-colnames(corrx) |
|
45 |
+rownames(path)<-rownames(corrx) |
|
46 |
+ |
|
47 |
+corr.outcome<-apply(path,1,sum) |
|
48 |
+path<-cbind(path,corr.outcome) |
|
49 |
+pn<-ncol(path) |
|
50 |
+beta<-fit$coef |
|
51 |
+se<-beta[,2] |
|
52 |
+#betacol<-matrix(NA,nrow=nrow(beta),ncol=1) |
|
53 |
+betar<-round(beta,6) |
|
54 |
+betar[,ncol(beta)]<-beta[,ncol(beta)] #p-values without round treatment |
|
55 |
+#print(betar) |
|
56 |
+corrname<-rownames(corr) |
|
57 |
+colnames(path)[pn]<-outcome |
|
58 |
+ |
|
59 |
+pathname<-colnames(path) |
|
60 |
+ |
|
61 |
+corr<-round(corr,6) |
|
62 |
+path<-round(path,6) |
|
63 |
+#R2<-rep(NA,cn) |
|
64 |
+#R2[1]<-fit$r.square |
|
65 |
+R_square<-rep(NA,ncol(path)) |
|
66 |
+Pe<-rep(NA,ncol(path)) |
|
67 |
+Pe[1]<-pe |
|
68 |
+#path<-rbind(path,R2) |
|
69 |
+R_square[1]<-round(fit$r.square,6) |
|
70 |
+Path_value<-rep(NA,length(py)+1) |
|
71 |
+Path_value[1:length(py)]<-round(py,6) |
|
72 |
+t_value<-p_value<-rep(NA,(1+length(py))) |
|
73 |
+for(i in 1:length(py)){ |
|
74 |
+t_value[i]<-py[i]/se[i] |
|
75 |
+} |
|
76 |
+for(i in 1:length(py)){ |
|
77 |
+p_value[i]<-2*pt(-abs(t_value[i]),df=rn-cn) |
|
78 |
+} |
|
79 |
+ |
|
80 |
+betax<-matrix(NA,nrow=nrow(betar),ncol=(cn-ncol(beta))) |
|
81 |
+betar<-cbind(betar,betax) |
|
82 |
+ |
|
83 |
+result<-rbind(betar,corrname,corr,pathname,path,Path_value,Pe,R_square) |
|
84 |
+return(noquote(result)) |
|
85 |
+ |
|
86 |
+} |
0 | 87 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,144 @@ |
1 |
+pathdiagram <- |
|
2 |
+function(pathdata,disease,R2,range){ |
|
3 |
+ |
|
4 |
+#library(diagram) |
|
5 |
+ |
|
6 |
+pathcad<-pathdata |
|
7 |
+ |
|
8 |
+#disease<-"CAD" |
|
9 |
+#range<-c(2:5) |
|
10 |
+pathname<-toupper(colnames(pathcad)[range]) |
|
11 |
+ |
|
12 |
+rownames(pathcad)<-pathname |
|
13 |
+ |
|
14 |
+corr.xx<-pathcad[,range] |
|
15 |
+path_value<-pathcad$path |
|
16 |
+residual<-sqrt(1-R2) |
|
17 |
+ |
|
18 |
+par(mar = c(3, 1, 1, 1)) |
|
19 |
+openplotmat() |
|
20 |
+shdv<-0.005 |
|
21 |
+colorset<-c(colors()[645],colors()[642],colors()[503],colors()[373],colors()[92],colors()[134],colors()[139],colors()[39],colors()[20],colors()[44],colors()[71],colors()[123],colors()[128],colors()[132]) |
|
22 |
+cl<-length(colorset) |
|
23 |
+ |
|
24 |
+mycolor<-rep(NA,cl) |
|
25 |
+mycolor<-colorset[1:cl] |
|
26 |
+mypath<-cbind(seq(1:length(path_value)),path_value) |
|
27 |
+mypath<-mypath[order(-path_value),] |
|
28 |
+indx<-mypath[,1] |
|
29 |
+mcolor<-rep(NA,(1+length(path_value))) |
|
30 |
+ |
|
31 |
+pl<-length(path_value) |
|
32 |
+for(i in 1:pl){ |
|
33 |
+ if(mypath[i,2]<0){ |
|
34 |
+ k=cl+1-i |
|
35 |
+ }else if(mypath[i,2]>0&mypath[i,2]<0.15){ |
|
36 |
+ k=cl-5-i |
|
37 |
+ }else{ |
|
38 |
+k=i |
|
39 |
+ } |
|
40 |
+mcolor[(indx[i]+1)]<-mycolor[k] |
|
41 |
+} |
|
42 |
+mcolor[1]<-colors()[82] |
|
43 |
+ |
|
44 |
+# Get plot coordinates |
|
45 |
+elpos <- coordinates(c(2, length(path_value))) |
|
46 |
+# adjust coordinates for Residual |
|
47 |
+elpos[2,1] <- abs((elpos[1,1]+elpos[1,2])/2) |
|
48 |
+ |
|
49 |
+# Specify Arrow positions |
|
50 |
+#1 Residual to Dependent |
|
51 |
+ft1 <- matrix(ncol = 2, byrow = TRUE, data = c(1, 2)) |
|
52 |
+#2 Independent to dependent |
|
53 |
+ft2 <- matrix(ncol=2, byrow = FALSE, |
|
54 |
+data= c(seq((2+length(path_value)))[3:(length(path_value)+2)], rep(2, length(path_value)))) |
|
55 |
+ |
|
56 |
+#3 For cor.x |
|
57 |
+fromto_CU <- t(combn(seq((2+length(path_value)))[3:(length(path_value)+2)],2)) |
|
58 |
+#4 For path distances |
|
59 |
+fromto_ST <- rbind(ft1,ft2) |
|
60 |
+ |
|
61 |
+# Plot Path distance arrows |
|
62 |
+nr <- nrow(fromto_ST) |
|
63 |
+ |
|
64 |
+arrpos <- matrix(ncol = 2, nrow = nr) |
|
65 |
+ |
|
66 |
+for (i in 1:nr){ |
|
67 |
+k=i+6 |
|
68 |
+ arrpos[i, ] <-straightarrow (to = elpos[fromto_ST[i, 2], ], |
|
69 |
+ from = elpos[fromto_ST[i, 1], ], |
|
70 |
+ lcol=mcolor[i], lwd = 3, arr.pos = 0.7, arr.length = 0.5) |
|
71 |
+ } |
|
72 |
+ |
|
73 |
+#Label residual path distance arrow |
|
74 |
+text(arrpos[1, 1]-0.05, arrpos[1, 2] + 0.03, |
|
75 |
+ paste("Pe", " = ", round(residual, 3), sep=""), cex=1) |
|
76 |
+ |
|
77 |
+#Label path distance arrows |
|
78 |
+nr <- nrow(arrpos) |
|
79 |
+for(i in 2:nr){ |
|
80 |
+ text(arrpos[i, 1]+(i*0.7-2)*0.14, arrpos[i, 2]-0.18,paste("P","(",pathname[i-1],")"," = ", round(path_value[i-1], 3), sep=""),cex=1,col=mcolor[i]) |
|
81 |
+ |
|
82 |
+} |
|
83 |
+ |
|
84 |
+# Plot correlation arrows direction 1 |
|
85 |
+nr <- nrow(fromto_CU) |
|
86 |
+arrpos <- matrix(ncol = 2, nrow = nr) |
|
87 |
+for (i in 1:nr){ |
|
88 |
+#k=i+6 |
|
89 |
+ arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 2], ], |
|
90 |
+ from = elpos[fromto_CU[i, 1], ], |
|
91 |
+ lwd = 2, arr.pos = 0.8, arr.length = 0.5, curve = 0.3) |
|
92 |
+} |
|
93 |
+ |
|
94 |
+# Plot correlation arrows - direction 2 |
|
95 |
+nr <- nrow(fromto_CU) |
|
96 |
+arrpos <- matrix(ncol = 2, nrow = nr) |
|
97 |
+for (i in 1:nr){ |
|
98 |
+#k=i+6 |
|
99 |
+ arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 1], ], |
|
100 |
+ from = elpos[fromto_CU[i, 2], ], |
|
101 |
+ lwd = 2, arr.pos = 0.8, arr.length = 0.5, curve = -0.3) |
|
102 |
+} |
|
103 |
+# Create combinations of cor.x for labelling rxy in correlation arrows |
|
104 |
+rcomb <- as.data.frame(t(combn(seq(nrow(corr.xx)),2))) |
|
105 |
+rcomb <- paste(rcomb$V1,rcomb$V2, sep="") |
|
106 |
+ |
|
107 |
+# Label correlation arrows |
|
108 |
+nr <- nrow(fromto_CU) |
|
109 |
+arrpos <- matrix(ncol = 2, nrow = nr) |
|
110 |
+for (i in 1:nr) |
|
111 |
+# k=i+6 |
|
112 |
+ arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 1], ], |
|
113 |
+ from = elpos[fromto_CU[i, 2], ], |
|
114 |
+ lwd = 2, arr.pos = 0.5, lcol = "transparent", arr.length = 0.5, curve = -0.3) |
|
115 |
+ |
|
116 |
+nr <- nrow(arrpos) |
|
117 |
+for(i in 1:nr){ |
|
118 |
+ text(arrpos[i, 1], arrpos[i, 2] + 0.03, |
|
119 |
+ paste("r", rcomb[i]," = ", round(as.dist(corr.xx)[i], 3), sep=""), cex=1) |
|
120 |
+} |
|
121 |
+ |
|
122 |
+# Label Residual |
|
123 |
+#textrect (elpos[1,], 0.09, 0.03,lab = "Residual", box.col =colors()[(75+7)], |
|
124 |
+# shadow.col = "grey", shadow.size = 0.01, cex = 1) |
|
125 |
+ |
|
126 |
+textround(elpos[1,], 0.02, 0.04,lab = "Residual", box.col =colors()[82], |
|
127 |
+ shadow.col = "grey", shadow.size = shdv, cex = 1) |
|
128 |
+ |
|
129 |
+# Label Dependent |
|
130 |
+#textrect (elpos[2,], 0.09, 0.03,lab = "AUDPC", box.col = "red", |
|
131 |
+# shadow.col = "grey", shadow.size = 0.005, cex = 1) |
|
132 |
+ |
|
133 |
+textround (elpos[2,], 0.02, 0.04,lab = disease, box.col = "red", |
|
134 |
+ shadow.col = "grey", shadow.size = shdv, cex = 1) |
|
135 |
+ |
|
136 |
+# Label independents |
|
137 |
+nr <- nrow(elpos) |
|
138 |
+for (i in 3:nr){ |
|
139 |
+ k<-i+5 |
|
140 |
+ # textrect (elpos[i,], 0.09, 0.03,lab = colnames(x)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", #shadow.size = 0.005, cex = 1) |
|
141 |
+ |
|
142 |
+ textround (elpos[i,], 0.02, 0.04,lab = pathname[i-2], box.col =mcolor[i-1],shadow.col = "grey", shadow.size = shdv, cex = 1) |
|
143 |
+} |
|
144 |
+} |
0 | 145 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,214 @@ |
1 |
+pathdiagram2 <- |
|
2 |
+function(pathD,pathO,rangeD,rangeO,disease,R2D,R2O){ |
|
3 |
+#require(agricolae) |
|
4 |
+#require(Hmisc) |
|
5 |
+#library(diagram) |
|
6 |
+ |
|
7 |
+pathcad<-pathD |
|
8 |
+pathtc<-pathO |
|
9 |
+#rangeD<-c(2:5) |
|
10 |
+#rangeO<-c(2:4) |
|
11 |
+#R2D<-0.241792 |
|
12 |
+#R2O<-0.965284 |
|
13 |
+cadpathname<-toupper(colnames(pathcad)[rangeD]) |
|
14 |
+ |
|
15 |
+rownames(pathcad)<-cadpathname |
|
16 |
+rownametc<-toupper(colnames(pathtc)[rangeO]) |
|
17 |
+for(i in 1:length(rownametc)) |
|
18 |
+if(identical(cadpathname[i],rownametc[i])==FALSE){ |
|
19 |
+ stop("no match between child and parent causal variables.") |
|
20 |
+} |
|
21 |
+if(R2D==""|R2O==""){ |
|
22 |
+stop("R2D and R2O are required") |
|
23 |
+} |
|
24 |
+pathtc<-data.frame(pathtc) |
|
25 |
+pathcad<-data.frame(pathD) |
|
26 |
+#rownames(pathtc)<-rownametc |
|
27 |
+corr.tc<-pathtc[,rangeO] |
|
28 |
+colnames(corr.tc)<-rownametc |
|
29 |
+rownames(corr.tc)<-rownametc |
|
30 |
+path.tc<-pathtc$path |
|
31 |
+path.cad<-pathcad$path |
|
32 |
+residual.tc<-sqrt(1-R2O) |
|
33 |
+residual.cad<-sqrt(1-R2D) |
|
34 |
+ |
|
35 |
+par(mar = c(1, 1, 1, 1)) |
|
36 |
+openplotmat() |
|
37 |
+# Get plot coordinates |
|
38 |
+elpos <- coordinates(c(2, length(path.tc)))*0.7 |
|
39 |
+ |
|
40 |
+# adjust coordinates for Residual |
|
41 |
+elpos[2,1] <- abs((elpos[1,1]+elpos[1,2])/2) |
|
42 |
+ |
|
43 |
+# Specify Arrow positions |
|
44 |
+#1 Residual to Dependent |
|
45 |
+ft1 <- matrix(ncol = 2, byrow = TRUE, data = c(1, 2)) |
|
46 |
+#2 Independent to dependent |
|
47 |
+ft2 <- matrix(ncol=2, byrow = FALSE, |
|
48 |
+data= c(seq((2+length(path.tc)))[3:(length(path.tc)+2)], rep(2, length(path.tc)))) |
|
49 |
+#3 For cor.x |
|
50 |
+fromto_CU <- t(combn(seq((2+length(path.tc)))[3:(length(path.tc)+2)],2)) |
|
51 |
+#4 For path distances |
|
52 |
+fromto_ST <- rbind(ft1,ft2) |
|
53 |
+ |
|
54 |
+# Plot Path distance arrows |
|
55 |
+nr <- nrow(fromto_ST) |
|
56 |
+ |
|
57 |
+arrpos <- matrix(ncol = 2, nrow = nr) |
|
58 |
+ |
|
59 |
+for (i in 1:nr){ |
|
60 |
+k=i+6 |
|
61 |
+ arrpos[i, ] <-straightarrow (to = elpos[fromto_ST[i, 2], ], |
|
62 |
+ from = elpos[fromto_ST[i, 1], ], |
|
63 |
+ lcol=colors()[(75+k)], lwd = 2, arr.pos = 0.6, arr.length = 0.5) |
|
64 |
+ } |
|
65 |
+ |
|
66 |
+#Label residual path distance arrow |
|
67 |
+text(arrpos[1, 1]-0.05, arrpos[1, 2] + 0.02, |
|
68 |
+ paste("Pe", " = ", round(residual.tc, 3), sep=""), cex=0.8) |
|
69 |
+ |
|
70 |
+#Label path distance arrows |
|
71 |
+nr <- nrow(arrpos) |
|
72 |
+for(i in 2:nr){ |
|
73 |
+ k=i+6 |
|
74 |
+text(arrpos[i, 1]+(i-2)*0.06, arrpos[i, 2]-0.05,paste("P","(",tolower(rownametc[i-1]),")"," = ", round(path.tc[i-1], 3), sep=""),cex=0.8,col=colors()[(75+k)]) |
|
75 |
+ |
|
76 |
+} |
|
77 |
+ |
|
78 |
+# Plot correlation arrows direction 1 |
|
79 |
+nr <- nrow(fromto_CU) |
|
80 |
+arrpos <- matrix(ncol = 2, nrow = nr) |
|
81 |
+for (i in 1:nr){ |
|
82 |
+#k=i+6 |
|
83 |
+ arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 2], ], |
|
84 |
+ from = elpos[fromto_CU[i, 1], ], |
|
85 |
+ lwd = 2, arr.pos = 0.8, arr.length = 0.5, curve = 0.35) |
|
86 |
+} |
|
87 |
+ |
|
88 |
+# Plot correlation arrows - direction 2 |
|
89 |
+for (i in 1:nr){ |
|
90 |
+#k=i+6 |
|
91 |
+ arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 1], ], |
|
92 |
+ from = elpos[fromto_CU[i, 2], ], |
|
93 |
+ lwd = 2, arr.pos = 0.8, arr.length = 0.5, curve = -0.35) |
|
94 |
+} |
|
95 |
+ |
|
96 |
+# Create combinations of cor.x for labelling rxy in correlation arrows |
|
97 |
+rcomb <- as.data.frame(t(combn(seq(nrow(corr.tc)),2))) |
|
98 |
+rcomb <- paste(rcomb$V1,rcomb$V2, sep="") |
|
99 |
+ |
|
100 |
+# Label correlation arrows |
|
101 |
+nr <- nrow(fromto_CU) |
|
102 |
+arrpos <- matrix(ncol = 2, nrow = nr) |
|
103 |
+for (i in 1:nr) |
|
104 |
+# k=i+6 |
|
105 |
+ arrpos[i, ] <- curvedarrow (to = elpos[fromto_CU[i, 1], ], |
|
106 |
+ from = elpos[fromto_CU[i, 2], ], |
|
107 |
+ lwd = 2, arr.pos = 0.5, lcol = "transparent", arr.length = 0.5, curve = -0.35) |
|
108 |
+ |
|
109 |
+nr <- nrow(arrpos) |
|
110 |
+for(i in 1:nr){ |
|
111 |
+ text(arrpos[i, 1], arrpos[i, 2] + 0.03, |
|
112 |
+ paste("r", rcomb[i]," = ", round(as.dist(corr.tc)[i], 2), sep=""), cex=1) |
|
113 |
+} |
|
114 |
+ |
|
115 |
+# Label Residual |
|
116 |
+#textrect (elpos[1,], 0.09, 0.03,lab = "Residual", box.col =, |
|
117 |
+# shadow.col = "grey", shadow.size = 0.005, cex = 1) |
|
118 |
+ |
|
119 |
+textround(c(elpos[1,1]-0.08,elpos[1,2]), 0.02, 0.04,lab = "Residual", box.col =colors()[(75+7)], |
|
120 |
+ shadow.col = "grey", shadow.size = 0.005, cex = 1) |
|
121 |
+ |
|
122 |
+# Label Dependent |
|
123 |
+#textrect (elpos[2,], 0.09, 0.03,lab = "AUDPC", box.col = "red", |
|
124 |
+# shadow.col = "grey", shadow.size = 0.005, cex = 1) |
|
125 |
+ |
|
126 |
+# the second outcome variable boxround |
|
127 |
+textround (elpos[2,], 0.02, 0.04,lab = cadpathname[length(cadpathname)], box.col = colors()[122], |
|
128 |
+ shadow.col = "grey", shadow.size = 0.005, cex = 1.3) |
|
129 |
+ |
|
130 |
+# Label independents |
|
131 |
+nr <- nrow(elpos) |
|
132 |
+for (i in 3:nr){ |
|
133 |
+ k<-i+5 |
|
134 |
+ # textrect (elpos[i,], 0.09, 0.03,lab = colnames(x)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", #shadow.size = 0.005, cex = 1) |
|
135 |
+ |
|
136 |
+ textround (elpos[i,], 0.02, 0.04,lab = colnames(corr.tc)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", shadow.size = 0.005, cex = 1) |
|
137 |
+} |
|
138 |
+ |
|
139 |
+elpos1 <- coordinates(c(2, length(path.cad)))*1.15 |
|
140 |
+ |
|
141 |
+# adjust coordinates for Residual |
|
142 |
+elpos1[2,1] <- abs((elpos1[1,1]+elpos1[1,2])*0.75) |
|
143 |
+ |
|
144 |
+# Specify Arrow positions |
|
145 |
+#1 Residual to Dependent |
|
146 |
+fit1 <- matrix(ncol = 2, byrow = TRUE, data = c(1, 2)) |
|
147 |
+#2 Independent to dependent |
|
148 |
+fit2 <- matrix(ncol=2, byrow = FALSE, |
|
149 |
+data= c(seq((2+length(path.cad)))[3:(length(path.cad)+2)], rep(2, length(path.cad)))) |
|
150 |
+#3 For cor.x |
|
151 |
+fromto_CU1 <- t(combn(seq((2+length(path.cad)))[3:(length(path.cad)+2)],2)) |
|
152 |
+#4 For path distances |
|
153 |
+fromto_ST1 <- rbind(fit1,fit2) |
|
154 |
+ |
|
155 |
+# Plot Path distance arrows |
|
156 |
+nr1 <- nrow(fromto_ST1)-1 |
|
157 |
+ |
|
158 |
+arrpos1 <- matrix(ncol = 2, nrow = nr1) |
|
159 |
+ |
|
160 |
+arrpos1[1, ] <-straightarrow (to = c(elpos1[fromto_ST1[1, 2], 1]-0.5,elpos1[fromto_ST1[1, 2], 2]), |
|
161 |
+ from = c(elpos1[fromto_ST1[1, 1], 1]-0.1,elpos1[fromto_ST1[1, 1], 2]), |
|
162 |
+ lcol=colors()[(75+7)], lwd = 2, arr.pos = 0.5, arr.length = 0.5) |
|
163 |
+ |
|
164 |
+ # path line of the second outcome (TC) to the fist outcome variable (disease) |
|
165 |
+arrpos1[2, ] <-straightarrow (to = c(elpos1[fromto_ST1[1, 2], 1]-0.5,elpos1[fromto_ST1[1, 2], 2]), |
|
166 |
+ from = c(elpos[fromto_ST[1, 2], 1],elpos[fromto_ST[1, 2], 2]+0.03), |
|
167 |
+ lcol=colors()[122], lwd = 2, arr.pos = 0.6, arr.length = 0.7) |
|
168 |
+ |
|
169 |
+for (i in 2:nr1){ |
|
170 |
+if(i==3){ |
|
171 |
+k=i+6 |
|
172 |
+ arrpos1[i, ] <-curvedarrow(to = c(elpos1[fromto_ST1[i, 2], 1]-0.5,elpos1[fromto_ST1[i, 2], 2]), |
|
173 |
+ from = elpos[fromto_ST[i, 1], ], |
|
174 |
+ lcol=colors()[(75+k)], lwd = 2, arr.pos = 0.7, arr.length = 0.5, curve = 0.05) |
|
175 |
+}else{ |
|
176 |
+k=i+6 |
|
177 |
+ arrpos1[i, ] <-straightarrow (to = c(elpos1[fromto_ST1[i, 2], 1]-0.5,elpos1[fromto_ST1[i, 2], 2]), |
|
178 |
+ from = elpos[fromto_ST[i, 1], ], |
|
179 |
+ lcol=colors()[(75+k)], lwd = 2, arr.pos = 0.7, arr.length = 0.5) |
|
180 |
+ } |
|
181 |
+} |
|
182 |
+#Label residual path distance arrow |
|
183 |
+ |
|
184 |
+text(arrpos1[1, 1]-0.04, arrpos1[1, 2] + 0.03, |
|
185 |
+ paste("Pe", " = ", round(residual.cad, 3), sep=""), cex=0.8) |
|
186 |
+ |
|
187 |
+# residual coefficient to the firest outcome(disease) |
|
188 |
+textround(c(elpos1[1,1]-0.18,elpos1[1,2]), 0.02, 0.04,lab = "Residual", box.col =colors()[(75+7)], |
|
189 |
+ shadow.col = "grey", shadow.size = 0.005, cex = 1) |
|
190 |
+ |
|
191 |
+textround (c(elpos1[2,1]-0.5,elpos1[2,2]), 0.02, 0.04,lab = disease, box.col = "red", |
|
192 |
+ shadow.col = "grey", shadow.size = 0.005, cex = 1) |
|
193 |
+ |
|
194 |
+ # path coefficient of second outcome variable (such as TC)to first outcome variable(disease) |
|
195 |
+ text(arrpos1[2, 1]+0.1, arrpos1[2, 2]+0.03, |
|
196 |
+ paste("P","(", cadpathname[length(cadpathname)],")", " = ", round(path.cad[length(path.cad)], 3), sep=""), cex=0.8,col=colors()[122]) |
|
197 |
+ |
|
198 |
+nr1 <- nrow(arrpos1) |
|
199 |
+for(i in 2:nr1){ |
|
200 |
+ k=i+6 |
|
201 |
+text(arrpos1[i, 1]+(i*0.8-2)*0.09, arrpos1[i, 2]-(i-1)*0.05,paste("P","(", cadpathname[i-1],")"," = ", round(path.cad[i-1], 3), sep=""),cex=1,col=colors()[(75+k)]) |
|
202 |
+ |
|
203 |
+} |
|
204 |
+ |
|
205 |
+# recover the color and labs |
|
206 |
+nr <- nrow(elpos) |
|
207 |
+for (i in 3:nr){ |
|
208 |
+ k<-i+5 |
|
209 |
+ # textrect (elpos[i,], 0.09, 0.03,lab = colnames(x)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", #shadow.size = 0.005, cex = 1) |
|
210 |
+ |
|
211 |
+ textround (elpos[i,], 0.02, 0.04,lab = colnames(corr.tc)[i-2], box.col =colors()[(75+k)],shadow.col = "grey", shadow.size = 0.005, cex = 1.3) |
|
212 |
+} |
|
213 |
+ |
|
214 |
+} |
0 | 215 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,108 @@ |
1 |
+snpposit <- |
|
2 |
+function(SNPdata,SNP_hg19="chr",X="no",LG= 10,main="A",maxd=2000){ |
|
3 |
+SNPdata<-as.data.frame(SNPdata) |
|
4 |
+if(SNP_hg19!="chr"){ |
|
5 |
+hg19<-as.character(SNPdata$SNP_hg19) |
|
6 |
+#print(hg19[1:10]) |
|
7 |
+ |
|
8 |
+chromp<-chrp(hg19) |
|
9 |
+#print(chromp[1:6,]) |
|
10 |
+}else{ |
|
11 |
+chr<-SNPdata$chr |
|
12 |
+if(length(chr)==0){ |
|
13 |
+stop("no data") |
|
14 |
+} |
|
15 |
+ |
|
16 |
+SNPdata<-SNPdata[with(SNPdata,order(chr,posit)),] |
|
17 |
+chrn<-as.numeric(SNPdata$chr) |
|
18 |
+posit<-as.numeric(SNPdata$posit) |
|
19 |
+chromp<-cbind(chrn,posit) |
|
20 |
+ |
|
21 |
+#print(CHR) |
|
22 |
+} |
|
23 |
+chr<-chromp[,1] |
|
24 |
+CHR<-unique(chr) |
|
25 |
+nch<-length(CHR) # number of chromosomes |
|
26 |
+meanl<-m<-md<-rep(NA,nch) |
|
27 |
+ |
|
28 |
+#calculate SNP number on each chromosome |
|
29 |
+nsnpchr<-function(x,LG){ |
|
30 |
+y<-as.numeric(x[,2]) |
|
31 |
+#print(y) |
|
32 |
+nr<-length(y) |
|
33 |
+y<-sort(y) |
|
34 |
+m<-0 |
|
35 |
+ix<-seq(nr-1) |
|
36 |
+iy<-seq(from=2,to=nr) |
|
37 |
+m<-length(which(y[iy]-y[ix]>=LG)) |
|
38 |
+return(m) |
|
39 |
+} |
|
40 |
+ |
|
41 |
+#averaged distance between SNPs on each chromosome |
|
42 |
+mdist<-function(x,LG){ |
|
43 |
+x<-as.data.frame(x) |
|
44 |
+y<-x[,2] |
|
45 |
+#print(y) |
|
46 |
+nr<-length(y) |
|
47 |
+ y<-sort(y) |
|
48 |
+ mdist<-(y[nr]-y[1])/((nr-1)*LG) |
|
49 |
+return(mdist) |
|
50 |
+ |
|
51 |
+} |
|
52 |
+ |
|
53 |
+#calculate average distance between SNPs and SNP number on each chromosome |
|
54 |
+m<-md<-snpn<-rep(0,nch) |
|
55 |
+for(i in 1:nch){ |
|
56 |
+chrmp<-subset(chromp,chrn==i) |
|
57 |
+snpn[i]<-nrow(chrmp) |
|
58 |
+if(snpn[i]!=0){ |
|
59 |
+m[i]<-nsnpchr(subset(chromp,chrn==i),LG) |
|
60 |
+md[i]<-mdist(subset(chromp,chrn==i),LG) |
|
61 |
+ } |
|
62 |
+} |
|
63 |
+ |
|
64 |
+md[which(md>=maxd)]<-maxd |
|
65 |
+md[which(is.na(md))]<-0 |
|
66 |
+#print(length(md)) |
|
67 |
+#print(length(CHR)) |
|
68 |
+colors<-c("red","lightsalmon","lightseagreen","mediumspringgreen","mediumorchid4","mediumpurple4","lightskyblue4","mediumseagreen","deeppink", |
|
69 |
+"mediumvioletred","firebrick","magenta","navyblue","lightsteelblue2","navajowhite","blue","maroon4","orange2","purple","darkorchid1", |
|
70 |
+"black","cyan") |
|
71 |
+ |
|
72 |
+chrcol<-rep(NA,nch) |
|
73 |
+for(i in 1:nch){ |
|
74 |
+ chrcol[i]<-colors[i] |
|
75 |
+} |
|
76 |
+if(X=="yes"){ |
|
77 |
+CHR[nch]<-"X" |
|
78 |
+} |
|
79 |
+#print(CHR) |
|
80 |
+par(oma=c(2,1,2,1)) |
|
81 |
+bp<-barplot(md,width=2,ylab="mean length (kbp) of interval between SNPs", names.arg =paste("chr",CHR,sep=""),col=chrcol,ylim=c(0,(maxd+250)), |
|
82 |
+main=main,cex.lab=1.5,cex.names=1,cex.main=2.5,cex.axis=1.5) |
|
83 |
+ |
|
84 |
+# space between lab and column |
|
85 |
+YY<-rep(NA,nch) |
|
86 |
+for(i in 1:nch){ |
|
87 |
+ if(!is.na(md[i])){ |
|
88 |
+if(md[i]>=1000){ |
|
89 |
+YY[i]<-md[i]*1.05 |
|
90 |
+ }else if(md[i]<1000&md[i]>=500){ |
|
91 |
+ YY[i]<-md[i]*1.08 |
|
92 |
+ }else if(md[i]<500&md[i]>=200){ |
|
93 |
+ YY[i]<-md[i]*1.2 |
|
94 |
+ }else if(md[i]<200&md[i]>=100){ |
|
95 |
+ YY[i]<-(md[i]*1.8) |
|
96 |
+ }else if(md[i]<100&md[i]>=50){ |
|
97 |
+ YY[i]<-(md[i]*2.5) |
|
98 |
+ }else if(md[i]<50&md[i]>0){ |
|
99 |
+ YY[i]<-(md[i]*5) |
|
100 |
+ }else if(md[i]==0){ |
|
101 |
+ YY[i]<-45 |
|
102 |
+ } |
|
103 |
+ } |
|
104 |
+} |
|
105 |
+#text(x=bp,y=YY,labels=n,col="black") |
|
106 |
+text(x=bp,y=YY,labels=snpn,col="black",cex=1.3) |
|
107 |
+return(m) |
|
108 |
+} |
0 | 109 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,57 @@ |
1 |
+ucscannot <- |
|
2 |
+function(UCSCannot,SNPn,A=3,B=1.9,C=1.3, method=1){ |
|
3 |
+UCSCannot<-as.data.frame(UCSCannot) |
|
4 |
+function_unit<-UCSCannot$function_unit |
|
5 |
+if(is.null(function_unit)){ |
|
6 |
+stop("No function_unit found in the data") |
|
7 |
+} |
|
8 |
+genes<-UCSCannot$Symbol |
|
9 |
+if(is.null(genes)){ |
|
10 |
+stop("No Symbol found in the data") |
|
11 |
+} |
|
12 |
+ N<-length(genes) |
|
13 |
+ gene.n<-length(unique(genes)) |
|
14 |
+ |
|
15 |
+ intron<-subset(UCSCannot,function_unit=="intronic"|function_unit=="non-coding intronic") |
|
16 |
+ code<-subset(UCSCannot,function_unit=="coding") |
|
17 |
+ UTR3<-subset(UCSCannot,function_unit=="3utr") |
|
18 |
+ UTR5<-subset(UCSCannot,function_unit=="5utr") |
|
19 |
+ ncoding<-subset(UCSCannot,function_unit=="non-coding") |
|
20 |
+ upstream5<-subset(UCSCannot,function_unit=="5upstream") |
|
21 |
+ downstream3<-subset(UCSCannot,function_unit=="3downstream") |
|
22 |
+ |
|
23 |
+ intron<-length(intron[,1])/N |
|
24 |
+ coding<-length(code[,1])/N |
|
25 |
+ UTR3<-length(UTR3[,1])/N |
|
26 |
+ UTR5<-length(UTR5[,1])/N |
|
27 |
+ intergene<-length(ncoding[,1])/N |
|
28 |
+ upstream<-length(upstream5[,1])/N |
|
29 |
+ downstream<-length(downstream3[,1])/N |
|
30 |
+ |
|
31 |
+ res<-matrix(NA,1,8) |
|
32 |
+ res[1,]<-c(gene.n,coding,intron,UTR3,UTR5,intergene,upstream,downstream) |
|
33 |
+ colnames(res)<-c("genes","exons","introns","TUR3","UTR5","intergenes","upstream","downstream") |
|
34 |
+ res2<-c(coding,intron,intergene,UTR5,upstream,UTR3,downstream) |
|
35 |
+ oldcexmain<-par(cex.main=A) |
|
36 |
+ cols=c("gold2","red","magenta","blue","cornflowerblue","limegreen","mediumseagreen") |
|
37 |
+ main=paste("Distribution of",SNPn,"SNPs within",gene.n,"genes") |
|
38 |
+ |
|
39 |
+ if(method==1){ |
|
40 |
+ annot<-c("exon","intron","intergene","5'UTR","5'upstream","3'UTR","3'downstream") |
|
41 |
+ lbls <- paste(annot, "\n", round(res2*100,1),"%", sep="") |
|
42 |
+# oldcexmain<-par(cex.main=A) |
|
43 |
+ pie3D(res2,labelcex=B,labels=lbls,labelcol=par("fg"), explode=0.1,labelrad=C,main=main) |
|
44 |
+ }else if(method==2){ |
|
45 |
+ annot<-c("intron","exon","3'downstream","3'UTR","5'upstream","5'UTR","intergene") |
|
46 |
+ lbls <- paste(round(res2*100,1),"%", sep="") |
|
47 |
+ # cols=c("gold2","red","magenta","blue","cornflowerblue","limegreen","mediumseagreen") |
|
48 |
+ pie3D(res2,labelcex=(B+0.5),labels=lbls,labelcol=par("fg"), explode=0.08,labelrad=C,main=main) |
|
49 |
+ if (B>=1.8){ |
|
50 |
+ D<-1.8 |
|
51 |
+ }else{ |
|
52 |
+ D<-B} |
|
53 |
+ legend(0.5,1.05,annot,col=cols,pch=19,cex=D) |
|
54 |
+ } |
|
55 |
+ par(oldcexmain) |
|
56 |
+ return(res) |
|
57 |
+ } |
10 | 68 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,84 @@ |
1 |
+\name{GMRP-package} |
|
2 |
+\alias{GMRP-package} |
|
3 |
+\alias{GMRP} |
|
4 |
+\docType{package} |
|
5 |
+\title{ |
|
6 |
+\bold{GWAS}-based Mendelian Randomization Path Analysis |
|
7 |
+} |
|
8 |
+\description{ |
|
9 |
+\bold{GMRP} is used to perform Mendelian randomization analysis of causal variables on disease of study using SNP beta data from \bold{GWAS} or \bold{GWAS} meta analysis and furthermore execute path analysis of causal variables onto the disease. |
|
10 |
+} |
|
11 |
+\details{ |
|
12 |
+\bold{GMRP} can perform analyses of Mendelian randomization (MR),correlation, path of causal variables onto disease of interest and SNP annotation summarization analysis. MR includes SNP selection with given criteria and regression analysis of causal variables on the disease to generate beta values of causal variables on the disease. Using the beta values, \bold{GMRP} performs correlation and path analyses to construct path diagrams of causal variables to the disease. \bold{GMRP} consists of 8 functions: \emph{chrp, fmerg, mktable, pathdiagram2, pathdiagram, path, snpposit, ucscannot} and 5 datasets: \code{beta.data, cad.data, lpd.data, SNP358.data and SNP368_annot.data}. Function \emph{chrp} is used to separate string vector hg19 into two numeric vectors: chromosome number and SNP position on chromosomes. Function \emph{fmerg} is used to merge two datasets into one dataset. Function \emph{mktable} performs SNP selection and creates a standard beta table for function path to do path analyses. Function \emph{pathdiagram} is used to create a path diagram of causal variables onto the disease or onto outcome. Function \emph{pathdiagram2} can merg two-level pathdiagrams into one nested pathdiagram where inner path diagram is a path diagram of causal variables contributing onto outcome and the outside path diagram is a diagram of path of causal variables including outcome onto the disease. The five datasets provide examples for running these functions. \code{lpd.data} and \code{cad.data} provide an example to create a standard beta dataset for \emph{path} function to do path analysis and SNP data for SNP annotation analysis by performing \emph{mktable} and \emph{fmerg}. beta.data are a standard beta dataset for path analysis. \code{SNP358.data} provide an example for function \emph{snpposit} to do SNP position annotation analysis and \code{SNP368_annot.data} are for function \emph{ucscannot} to do SNP function annotation analysis. It is specially emphasized that except for that making standard beta table using \emph{mktable} must be done in Unix/Linux system,GMRP can be performed in Windows or Mac OS. This is because GWAS datasets usually are very huge but standard beta table is small. If users'Unix/Linux system has X11 or the other graphics system, then user should perform GMRP in Unix/Linux system, otherwise, user should transfer a standard beta table to a local computer and run GMRP in it. |
|
13 |
+ |
|
14 |
+} |
|
15 |
+\author{ |
|
16 |
+Yuan-De Tan |
|
17 |
+\email{tanyuande@gmail.com} |
|
18 |
+ |
|
19 |
+\\Dajiang Liu |
|
20 |
+ |
|
21 |
+\\Maintainer: Yuan-De Tan |
|
22 |
+} |
|
23 |
+\references{ |
|
24 |
+Do, R. et al. 2013. Common variants associated with plasma triglycerides and risk for coronary artery disease. Nat Genet 45: 1345-1352. |
|
25 |
+ |
|
26 |
+\\Sheehan, N.A. et al. 2008. Mendelian randomisation and causal inference in observational epidemiology. PLoS Med 5: e177. |
|
27 |
+ |
|
28 |
+\\Sheehan, N.A.,et al. 2010. Mendelian randomisation: a tool for assessing causality in observational epidemiology. Methods Mol Biol 713: 153-166.\\Wright, S. 1921. Correlation and causation. J. Agricultural Research 20: 557-585. |
|
29 |
+ |
|
30 |
+\\Wright, S. 1934. The method of path coefficients Annals of Mathematical Statistics 5 (3): 161-215 5: 161-215. |
|
31 |
+ |
|
32 |
+} |
|
33 |
+\keyword{ package } |
|
34 |
+\seealso{ |
|
35 |
+ \code{\link{path}}, \code{\link{mktable}}, \code{\link{pathdiagram}},\code{\link{pathdiagram2}}, \code{\link[diagram]{plotmat}}, \code{\link[diagram]{plotweb}} |
|
36 |
+ } |
|
37 |
+ |
|
38 |
+\examples{ |
|
39 |
+ |
|
40 |
+data(beta.data) |
|
41 |
+mybeta<-DataFrame(beta.data) |
|
42 |
+CAD<-beta.data$cad |
|
43 |
+LDL<-beta.data$ldl |
|
44 |
+HDL<-beta.data$hdl |
|
45 |
+TG<-beta.data$tg |
|
46 |
+TC<-beta.data$tc |
|
47 |
+#par(mfrow=c(2,2)) |
|
48 |
+plot(LDL,CAD,pch=19,col="blue",xlab="beta of SNPs on LDL",ylab="beta of SNP on CAD", |
|
49 |
+ main="A",cex.lab=1.5,cex.axis=1.5,cex.main=2) |
|
50 |
+abline(lm(CAD~LDL),col="red",lwd=2) |
|
51 |
+plot(HDL,CAD,pch=19,col="darkgreen",xlab="beta of SNPs on HDL",ylab="beta of SNP on |
|
52 |
+ CAD", main="B",cex.lab=1.5,cex.axis=1.5,cex.main=2) |
|
53 |
+abline(lm(CAD~HDL),col="red",lwd=2) |
|
54 |
+plot(TG,CAD,pch=19,col=colors()[96],xlab="beta of SNPs on TG",ylab="beta of SNP on |
|
55 |
+ CAD",main="C",cex.lab=1.5,cex.axis=1.5,cex.main=2) |
|
56 |
+abline(lm(CAD~TG),col="red",lwd=2) |
|
57 |
+plot(TC,CAD,pch=19,col=colors()[123],xlab="beta of SNPs on TC",ylab="beta of SNP on |
|
58 |
+ CAD",main="D",cex.lab=1.5,cex.axis=1.5,cex.main=2) |
|
59 |
+abline(lm(CAD~TC),col="red",lwd=2) |
|
60 |
+ |
|
61 |
+mod<-cad~ldl+hdl+tg+tc |
|
62 |
+pathvalue<-path(betav=mybeta,model=mod,outcome="cad") |
|
63 |
+ |
|
64 |
+mypath<-matrix(NA,3,4) |
|
65 |
+mypath[1,]<-c(1.000000,-0.066678, 0.420036,0.764638) |
|
66 |
+mypath[2,]<-c(-0.066678,1.000000,-0.559718,0.496831) |
|
67 |
+mypath[3,]<-c(0.420036,-0.559718,1.000000,0.414346) |
|
68 |
+colnames(mypath)<-c("ldl","hdl","tg","path") |
|
69 |
+mypath<-DataFrame(mypath) |
|
70 |
+#mypath |
|
71 |
+#DataFrame with 3 rows and 4 columns |
|
72 |
+# ldl hdl tg path |
|
73 |
+# <numeric> <numeric> <numeric> <numeric> |
|
74 |
+#1 1.000000 -0.066678 0.420036 0.764638 |
|
75 |
+#2 -0.066678 1.000000 -0.559718 0.496831 |
|
76 |
+#3 0.420036 -0.559718 1.000000 0.414346 |
|
77 |
+ |
|
78 |
+#> pathdiagram(pathdata=mypath,disease="cad",R2=0.988243,range=c(1:3)) |
|
79 |
+#Loading required package: shape |
|
80 |
+#Error in pathcad$path : $ operator is invalid for atomic vectors |
|
81 |
+mypath<-as.data.frame(mypath) |
|
82 |
+pathdiagram(pathdata=mypath,disease="cad",R2=0.988243,range=c(1:3)) |
|
83 |
+ |
|
84 |
+} |
0 | 85 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,30 @@ |
1 |
+\name{SNP358.data} |
|
2 |
+\alias{SNP358.data} |
|
3 |
+\docType{data} |
|
4 |
+\title{ |
|
5 |
+Data of 358 SNPs |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+\code{SNP358.data} were obtained from \bold{GWAS} Meta-analyzed datasets of lipoprotein cholesterols and coronary artery disease. The data contain three numeric vectors (columns): SNPID(\verb{rsid}), chromosome number (\verb{chr}) and SNP position on chromosome (posit). |
|
9 |
+} |
|
10 |
+\usage{data("SNP358.data")} |
|
11 |
+\format{ |
|
12 |
+ A data frame with 358 observations on the following 3 variables. |
|
13 |
+ \describe{ |
|
14 |
+ \item{\code{rsid}}{a character vector} |
|
15 |
+ \item{\code{chr}}{a numeric vector} |
|
16 |
+ \item{\code{posit}}{a numeric vector} |
|
17 |
+ } |
|
18 |
+} |
|
19 |
+\details{ |
|
20 |
+These 358 SNPs were chosen by using mktable with Pv = code{5x10e-08},\code{Pc = Pd =0.979} from code{lpd.data and cad.data}. They provide a data example for how to perform annotation analysis of SNP positions. |
|
21 |
+} |
|
22 |
+\value{ |
|
23 |
+A set of data with 358 rows(SNPs) and 3 columns(SNP ID, chromosome # and SNP position on chromosomes). |
|
24 |
+} |
|
25 |
+ |
|
26 |
+\examples{ |
|
27 |
+data(SNP358.data) |
|
28 |
+## maybe str(SNP358.data) ; plot(SNP358.data) ... |
|
29 |
+} |
|
30 |
+\keyword{datasets} |
0 | 31 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,38 @@ |
1 |
+\name{SNP368annot.data} |
|
2 |
+\alias{SNP368annot.data} |
|
3 |
+\docType{data} |
|
4 |
+\title{ |
|
5 |
+Annotation data of \verb{368SNPs} |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+The annotation data of 368SNPs are used to construct SNP distribution in gene elements (coding region, introne,UTR, etc). The data contain 12 vectors or variables but only Symbol and function_unit are used by ucscannot.R to build SNP distribution in gene elements. |
|
9 |
+} |
|
10 |
+\usage{data("SNP368annot.data")} |
|
11 |
+\format{ |
|
12 |
+ A data frame with 1053 observations on the following 6 variables. |
|
13 |
+ \describe{ |
|
14 |
+ \item{\code{SNP}}{a string vector} |
|
15 |
+ \item{\code{Allele}}{a string vector} |
|
16 |
+ \item{\code{Strand}}{a numeric vector} |
|
17 |
+ \item{\code{Symbol}}{a string vector} |
|
18 |
+ \item{\code{Gene}}{a string vector} |
|
19 |
+ \item{\code{function_unit}}{a string vector} |
|
20 |
+ } |
|
21 |
+} |
|
22 |
+\details{ |
|
23 |
+\code{SNP368annot.data} were obtained by performing mktable with \code{PV=5x10e-08},\code{Pc=Pd=0.979} on lpd.data and cad.data and SNP tools. \code{SNP368annot.data} provides an practical example for constructing distribution of SNPs in gene elements. Note that \verb{function_units} are gene elements. |
|
24 |
+} |
|
25 |
+\value{ |
|
26 |
+A dataset with 1053 rows and 6 columns for results of SNP annotation analysis. See format above. |
|
27 |
+} |
|
28 |
+\source{ |
|
29 |
+http://csg.sph.umich.edu//abecasis/public/lipids2013/ |
|
30 |
+ |
|
31 |
+} |
|
32 |
+\references{ |
|
33 |
+http://snp-nexus.org/test/snpnexus_19427/ |
|
34 |
+} |
|
35 |
+\examples{ |
|
36 |
+data(SNP368annot.data) |
|
37 |
+} |
|
38 |
+\keyword{datasets} |
0 | 39 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,44 @@ |
1 |
+\name{beta.data} |
|
2 |
+\alias{beta.data} |
|
3 |
+\docType{data} |
|
4 |
+\title{ |
|
5 |
+Beta Data Of SNP Regressed on Causal Variables and Disease |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+Beta data are a matrix dataset consisting of 5 columns: cad, ldl, hdl, tg, and tc with 368 rows. |
|
9 |
+} |
|
10 |
+\usage{data("beta.data")} |
|
11 |
+\format{ |
|
12 |
+ A data frame with 368 observations on the following 5 variables. |
|
13 |
+ \describe{ |
|
14 |
+ \item{\code{cad}}{a numeric vector} |
|
15 |
+ \item{\code{ldl}}{a numeric vector} |
|
16 |
+ \item{\code{hdl}}{a numeric vector} |
|
17 |
+ \item{\code{tg}}{a numeric vector} |
|
18 |
+ \item{\code{tc}}{a numeric vector} |
|
19 |
+ } |
|
20 |
+} |
|
21 |
+\details{ |
|
22 |
+Beta data are a matrix consisting of regression coefficients of 368 SNPs on cad, ldl, hdl, tg, tc where \verb{cad} is coronary artery disease, \verb{ldl} is low-density lipoprotein cholesterol, \verb{hdl}, high-density lipoprotein cholesterol, \verb{tg},triglycerides and \verb{tc}, total cholesterol in plasma. These 368 SNPs were obtained by using \verb{mktable} from \bold{GWAS} meta-analyzed data. |
|
23 |
+} |
|
24 |
+\value{ |
|
25 |
+A set of real regression coefficients of 368 SNPs on disease and causal variables. |
|
26 |
+} |
|
27 |
+\source{ |
|
28 |
+http://csg.sph.umich.edu//abecasis/public/lipids2013/ |
|
29 |
+ |
|
30 |
+\\http://www.cardiogramplusc4d.org/downloads/ |
|
31 |
+ |
|
32 |
+} |
|
33 |
+\references{ |
|
34 |
+Willer CJ et al. Discovery and refinement of loci associated with lipid levels. Nat. Genet. 2013. doi:10.1038/ng.2797. |
|
35 |
+ |
|
36 |
+\\Schunkert, H. et al. 2011. Large-scale association analysis identifies 13 new susceptibility loci for coronary artery disease. Nat Genet 43: 333-338.[online] |
|
37 |
+ |
|
38 |
+\\Schunkert H, Konig IR, Kathiresan S, Reilly MP, Assimes TL, Holm H et al. Large-scale association analysis identifies 13 new susceptibility loci for coronary artery disease. Nat Genet. 2011 43: 333-338. |
|
39 |
+} |
|
40 |
+\examples{ |
|
41 |
+data(beta.data) |
|
42 |
+## maybe str(beta.data) ; plot(beta.data) ... |
|
43 |
+} |
|
44 |
+\keyword{datasets} |
0 | 45 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,44 @@ |
1 |
+\name{cad.data} |
|
2 |
+\alias{cad.data} |
|
3 |
+\docType{data} |
|
4 |
+\title{ |
|
5 |
+bold{GWAS} Meta-analyzed Data of Coronary Artery Disease |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+\code{cad.data} are a matrix dataset consisting of 12 variables such as \verb{SNPID}, \verb{SNP} position on chromosomes, allele and alternative allele, allelic frequencies and 1069 \verb{SNP}s. |
|
9 |
+} |
|
10 |
+\usage{data("cad.data")} |
|
11 |
+\format{ |
|
12 |
+ A data frame with 1609 observations on the following 12 variables. |
|
13 |
+ \describe{ |
|
14 |
+ \item{\code{SNP}}{a character vector} |
|
15 |
+ \item{\code{chr_pos_b36}}{a character vector} |
|
16 |
+ \item{\code{reference_allele}}{a character vector} |
|
17 |
+ \item{\code{other_allele}}{a character vector} |
|
18 |
+ \item{\code{ref_allele_frequency}}{a numeric vector} |
|
19 |
+ \item{\code{pvalue}}{a numeric vector} |
|
20 |
+ \item{\code{het_pvalue}}{a numeric vector} |
|
21 |
+ \item{\code{log_odds}}{a numeric vector} |
|
22 |
+ \item{\code{log_odds_se}}{a numeric vector} |
|
23 |
+ \item{\code{N_case}}{a numeric vector} |
|
24 |
+ \item{\code{N_control}}{a numeric vector} |
|
25 |
+ \item{\code{model}}{a character vector} |
|
26 |
+ } |
|
27 |
+} |
|
28 |
+\details{ |
|
29 |
+\code{cad.data}, also called \verb{CARDIoGRAM} \bold{GWAS}, are a meta-analyzed GWAS data from \bold{GWAS} studies of European descent imputed to \bold{HapMap2} involving 22,233 cases and 64,762 controls. The data were downloaded from the following website. |
|
30 |
+} |
|
31 |
+\value{ |
|
32 |
+A data sheet consisting of 1609 rows(SNPs) and 12 columns(character vectors such as \verb{SNPID} and allele, numeric vector such as allele frequency and beta coefficient. See data format above). |
|
33 |
+} |
|
34 |
+\source{ |
|
35 |
+http://www.cardiogramplusc4d.org/downloads/ |
|
36 |
+} |
|
37 |
+\references{ |
|
38 |
+Schunkert H, Konig IR, Kathiresan S, Reilly MP, Assimes TL, Holm H et al. Large-scale association analysis identifies 13 new susceptibility loci for coronary artery disease. \bold{Nat Genet.} 2011 \bold{43}: 333-338. |
|
39 |
+} |
|
40 |
+\examples{ |
|
41 |
+data(cad.data) |
|
42 |
+## maybe str(cad.data) ; plot(cad.data) ... |
|
43 |
+} |
|
44 |
+\keyword{datasets} |
0 | 45 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,47 @@ |
1 |
+\name{chrp} |
|
2 |
+\alias{chrp} |
|
3 |
+\title{ |
|
4 |
+Split hg19 |
|
5 |
+} |
|
6 |
+\description{ |
|
7 |
+Split string hg19 into two numeric columns: chr and posit. |
|
8 |
+} |
|
9 |
+\usage{ |
|
10 |
+chrp(hg) |
|
11 |
+} |
|
12 |
+ |
|
13 |
+\arguments{ |
|
14 |
+ \item{hg}{ |
|
15 |
+character vector represented with code{chr##.#######} where chr## is chromosome number and \code{.#######} is SNP site (bp). |
|
16 |
+} |
|
17 |
+} |
|
18 |
+\details{ |
|
19 |
+chrp can convert \code{chr##.#######} into two numeric columns: chr(chromosome number) and posit(SNP position) |
|
20 |
+} |
|
21 |
+\value{ |
|
22 |
+Return two numeric vectors: chrosomome number and SNP position |
|
23 |
+} |
|
24 |
+\note{ |
|
25 |
+If there is chrX.######### in the data sheet, then user should change chrX.######## to chr23.#######. |
|
26 |
+} |
|
27 |
+\author{ |
|
28 |
+Yuan-De Tan |
|
29 |
+\email{tanyuande@gmail.com} |
|
30 |
+ |
|
31 |
+\\Dajiang Liu |
|
32 |
+} |
|
33 |
+\note{ |
|
34 |
+hg may also be hg18. User can also use packages GenomicRanges to retrieve chromosome # and SNP position. |
|
35 |
+} |
|
36 |
+ |
|
37 |
+\seealso{ |
|
38 |
+ \code{\link{mktable}},\code{link{GenomicRanges}[Granges]},\code{link{GenomicRanges}[IRangs]},\code{link{GenomicRanges}[DataFrame]} |
|
39 |
+} |
|
40 |
+\examples{ |
|
41 |
+data(lpd.data) |
|
42 |
+lpd<-lpd.data |
|
43 |
+hg19<-lpd$SNP_hg19.HDL |
|
44 |
+chr<-chrp(hg=hg19) |
|
45 |
+} |
|
46 |
+\keyword{chromosome} |
|
47 |
+\keyword{SNP} |
0 | 48 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,83 @@ |
1 |
+\name{fmerg} |
|
2 |
+\alias{fmerg} |
|
3 |
+\title{ |
|
4 |
+Merge Two \bold{GWAS} Result Data Sheets |
|
5 |
+} |
|
6 |
+\description{ |
|
7 |
+\emph{fmerg} can be used to merge two \bold{GWAS} result data sheets with the same key \verb{ID(SNP ID)} into one data sheet. |
|
8 |
+} |
|
9 |
+\usage{ |
|
10 |
+fmerg(fl1, fl2,ID1, ID2, A, B, method) |
|
11 |
+} |
|
12 |
+\arguments{ |
|
13 |
+ \item{fl1}{ |
|
14 |
+R object: data file 1 |
|
15 |
+} |
|
16 |
+ \item{fl2}{ |
|
17 |
+R object: data file 2 |
|
18 |
+} |
|
19 |
+ \item{ID1}{ |
|
20 |
+key id (SNP ID such as rsid) in file 1 |
|
21 |
+} |
|
22 |
+ \item{ID2}{ |
|
23 |
+key id (SNP ID such as rsid) in file 2 |
|
24 |
+} |
|
25 |
+ \item{A}{ |
|
26 |
+postfix for file 1: A=".W1". W1 may be any identifier in file 1. Default is A="". |
|
27 |
+} |
|
28 |
+ \item{B}{ |
|
29 |
+postfix for file 2: B=".W2". W2 may be any identifier in file2. Default is B="". |
|
30 |
+} |
|
31 |
+ \item{method}{ |
|
32 |
+method for merging. See details. |
|
33 |
+} |
|
34 |
+} |
|
35 |
+\details{ |
|
36 |
+fl1 and fl2 are two \bold{GWAS} result data files from different studies or with different risk variables. They contain \verb{SNPID}, \verb{hg18}, \verb{hg19}(positions), beta values, allele, frequency, and so on. The method has four options: method="No","NO" or "no" means that all data with unmatch \verb{SNP}s are not saved in the merged file; method="All","ALL" or "all" lets fmerg save all the data with unmatched \verb{SNP}s from two files but they are not paired one-by-one. This is different from R \emph{merge} function. method="file1" will save the data with unmatched \verb{SNP}s only from file 1 in the merged file and method="file2" allows function \emph{fmerg} to save the data with unmatched \verb{SNP}s from file2 in the merged file. |
|
37 |
+ |
|
38 |
+} |
|
39 |
+\value{ |
|
40 |
+Return a joined data sheet. |
|
41 |
+} |
|
42 |
+ |
|
43 |
+\author{ |
|
44 |
+Yuan-De Tan |
|
45 |
+\email{tanyuande@gmail.com} |
|
46 |
+ |
|
47 |
+\\Dajiang Liu |
|
48 |
+} |
|
49 |
+\note{ |
|
50 |
+Function fmerg can also be applied to the other types of data. |
|
51 |
+} |
|
52 |
+ |
|
53 |
+\seealso{ |
|
54 |
+ \code{\link[base]{merge}} |
|
55 |
+} |
|
56 |
+\examples{ |
|
57 |
+data1<-matrix(NA,20,4) |
|
58 |
+data2<-matrix(NA,30,7) |
|
59 |
+SNPID1<-paste("rs",seq(1:20),sep="") |
|
60 |
+SNPID2<-paste("rs",seq(1:30),sep="") |
|
61 |
+data1[,1:4]<-c(round(runif(20),4),round(runif(20),4),round(runif(20),4),round(runif(20),4)) |
|
62 |
+data2[,1:4]<-c(round(runif(30),4),round(runif(30),4),round(runif(30),4),round(runif(30),4)) |
|
63 |
+data2[,5:7]<-c(round(seq(30)*runif(30),4),round(seq(30)*runif(30),4),seq(30)) |
|
64 |
+data1<-cbind(SNPID1,as.data.frame(data1)) |
|
65 |
+data2<-cbind(SNPID2,as.data.frame(data2)) |
|
66 |
+dim(data1) |
|
67 |
+dim(data2) |
|
68 |
+colnames(data1)<-c("SNP","var1","var2","var3","var4") |
|
69 |
+colnames(data2)<-c("SNP","var1","var2","var3","var4","V1","V2","V3") |
|
70 |
+data12<-fmerg(fl1=data1,fl2=data2,ID1="SNP",ID2="SNP",A=".dat1",B=".dat2",method="No") |
|
71 |
+#data12[1:3,] |
|
72 |
+# SNP.dat1 var1.dat1 var2.dat1 var3.dat1 var4.dat1 SNP.dat2 var1.dat2 var2.dat2 |
|
73 |
+#1 rs1 0.9152 0.9853 0.9879 0.9677 rs1 0.5041 0.5734 |
|
74 |
+#2 rs10 0.3357 0.116 0.3408 0.1867 rs10 0.9147 0.9294 |
|
75 |
+#3 rs11 0.8004 0.8856 0.2236 0.4642 rs11 0.9262 0.5831 |
|
76 |
+# var3.dat2 var4.dat2 V1 V2 V3 |
|
77 |
+#1 0.4933 0.6766 0.1864 0.6836 1 |
|
78 |
+#2 0.4104 0.1327 3.2192 1.4166 10 |
|
79 |
+#3 0.8541 0.6228 1.1803 1.9044 11 |
|
80 |
+ |
|
81 |
+} |
|
82 |
+\keyword{data} |
|
83 |
+\keyword{merge} |
0 | 84 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,72 @@ |
1 |
+\name{lpd.data} |
|
2 |
+\alias{lpd.data} |
|
3 |
+\docType{data} |
|
4 |
+\title{ |
|
5 |
+\bold{GWAS} Meta-analyzed Data of Lipoprotein Cholesterols |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+lpd.data are standard \bold{GWAS} Meta-analyzed dataset of lipoprotein cholesterols. It was constructed by merging four datasets: \code{Mc_HDL.txt,Mc_LDL.txt, Mc_TC.txt and Mc_TG.txt}. |
|
9 |
+} |
|
10 |
+\usage{data("lpd.data")} |
|
11 |
+\format{ |
|
12 |
+ A data frame with 1609 observations on the following 40 variables. |
|
13 |
+ \describe{ |
|
14 |
+ \item{\code{SNP_hg18.HDL}}{a character vector} |
|
15 |
+ \item{\code{SNP_hg19.HDL}}{a character vector} |
|
16 |
+ \item{\code{rsid.HDL}}{a character vector} |
|
17 |
+ \item{\code{A1.HDL}}{a character vector} |
|
18 |
+ \item{\code{A2.HDL}}{a character vector} |
|
19 |
+ \item{\code{beta.HDL}}{a numeric vector} |
|
20 |
+ \item{\code{se.HDL}}{a numeric vector} |
|
21 |
+ \item{\code{N.HDL}}{a numeric vector} |
|
22 |
+ \item{\code{P.value.HDL}}{a numeric vector} |
|
23 |
+ \item{\code{Freq.A1.1000G.EUR.HDL}}{a numeric vector} |
|
24 |
+ \item{\code{SNP_hg18.LDL}}{a character vector} |
|
25 |
+ \item{\code{SNP_hg19.LDL}}{a character vector} |
|
26 |
+ \item{\code{rsid.LDL}}{a character vector} |
|
27 |
+ \item{\code{A1.LDL}}{a character vector} |
|
28 |
+ \item{\code{A2.LDL}}{a character vector} |
|
29 |
+ \item{\code{beta.LDL}}{a numeric vector} |
|
30 |
+ \item{\code{se.LDL}}{a numeric vector} |
|
31 |
+ \item{\code{N.LDL}}{a numeric vector} |
|
32 |
+ \item{\code{P.value.LDL}}{a numeric vector} |
|
33 |
+ \item{\code{Freq.A1.1000G.EUR.LDL}}{a numeric vector} |
|
34 |
+ \item{\code{SNP_hg18.TG}}{a character vector} |
|
35 |
+ \item{\code{SNP_hg19.TG}}{a character vector} |
|
36 |
+ \item{\code{rsid.TG}}{a character vector} |
|
37 |
+ \item{\code{A1.TG}}{a character vector} |
|
38 |
+ \item{\code{A2.TG}}{a character vector} |
|
39 |
+ \item{\code{beta.TG}}{a numeric vector} |
|
40 |
+ \item{\code{se.TG}}{a numeric vector} |
|
41 |
+ \item{\code{N.TG}}{a numeric vector} |
|
42 |
+ \item{\code{P.value.TG}}{a numeric vector} |
|
43 |
+ \item{\code{Freq.A1.1000G.EUR.TG}}{a numeric vector} |
|
44 |
+ \item{\code{SNP_hg18.TC}}{a character vector} |
|
45 |
+ \item{\code{SNP_hg19.TC}}{a character vector} |
|
46 |
+ \item{\code{rsid.TC}}{a character vector} |
|
47 |
+ \item{\code{A1.TC}}{a character vector} |
|
48 |
+ \item{\code{A2.TC}}{a character vector} |
|
49 |
+ \item{\code{beta.TC}}{a numeric vector} |
|
50 |
+ \item{\code{se.TC}}{a numeric vector} |
|
51 |
+ \item{\code{N.TC}}{a numeric vector} |
|
52 |
+ \item{\code{P.value.TC}}{a numeric vector} |
|
53 |
+ \item{\code{Freq.A1.1000G.EUR.TC}}{a numeric vector} |
|
54 |
+ } |
|
55 |
+} |
|
56 |
+\details{ |
|
57 |
+These four \bold{GWAS} Meta-analyzed data of lipoprotein cholesterols were downloaded from the following website. |
|
58 |
+} |
|
59 |
+\value{ |
|
60 |
+A data sheet consisting of 1609 rows (SNPs) and 40 columns(character vectors such as SNPID and allele, numeric vector such as allele frequency, beta coefficient. See data format above). |
|
61 |
+} |
|
62 |
+\source{ |
|
63 |
+http://csg.sph.umich.edu//abecasis/public/lipids2013/ |
|
64 |
+} |
|
65 |
+\references{ |
|
66 |
+Willer CJ et al. Discovery and refinement of loci associated with lipid levels. \emph{Nat. Genet.} 2013. doi:10.1038/ng.2797. |
|
67 |
+} |
|
68 |
+\examples{ |
|
69 |
+data(lpd.data) |
|
70 |
+## maybe str(lpd.data) ; plot(lpd.data) ... |
|
71 |
+} |
|
72 |
+\keyword{datasets} |
0 | 73 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,176 @@ |
1 |
+\name{mktable} |
|
2 |
+\alias{mktable} |
|
3 |
+\title{ |
|
4 |
+Selection of SNPs and Creation of A Standard Table for Mendelian Randomization and Path Analyses |
|
5 |
+} |
|
6 |
+\description{ |
|
7 |
+mktable is used to choose SNPs with \code{LG, Pv, Pc} and \code{Pd} and create a standard \verb{SNP} beta table for \code{Mendelian randomization} and \code{path analysis}, see details. |
|
8 |
+} |
|
9 |
+\usage{ |
|
10 |
+mktable(cdata, ddata,rt, varname, LG, Pv, Pc, Pd) |
|
11 |
+} |
|
12 |
+\arguments{ |
|
13 |
+ \item{cdata}{ |
|
14 |
+causal variable \bold{GWAS} data or \bold{GWAS} meta-analysed data containing \verb{SNP} \code{ID}, \verb{SNP} position, chromosome, allele, allelic frequency, beta value, \code{sd}, sample size, etc. |
|
15 |
+} |
|
16 |
+ \item{ddata}{ |
|
17 |
+disease \bold{GWAS} data or \bold{GWAS} meta-analysed data containing \verb{SNP} \code{ID}, \verb{SNP} position, chromosome, allele, allelic frequency, beta value, \code{sd}, sample size, etc. |
|
18 |
+} |
|
19 |
+ \item{rt}{ |
|
20 |
+a string that specifies type of returning table. It has two options: \code{rt="beta"} returns beta table or \code{rt="path"} returns \verb{SNP} direct path coefficient table. Default is "beta". |
|
21 |
+} |
|
22 |
+ \item{varname}{ |
|
23 |
+a required string set that lists names of undefined causal variables for Mendelian randomization and path analyses. The first name is disease name. Here an example given is \code{varname <-c("CAD","LDL","HD","TG","TC")}. |
|
24 |
+} |
|
25 |
+ \item{LG}{ |
|
26 |
+a numeric parameter. \code{LG} is a given minimum interval distnce between \verb{SNP}s and used to choose \verb{SNP}s with. Default \code{LG=1} |
|
27 |
+} |
|
28 |
+ \item{Pv}{ |
|
29 |
+a numeric parameter. \code{Pv} is a given maximum p-value that is used to choose \verb{SNP}s. Default Pv=5e-8 |
|
30 |
+} |
|
31 |
+ \item{Pc}{ |
|
32 |
+a numeric parameter. \code{Pc} is a given proportion of sample size to maximum sample size in causal variable data and used to choose \verb{SNP}s. Default \code{Pc=0.979} |
|
33 |
+} |
|
34 |
+ \item{Pd}{ |
|
35 |
+a numeric parameter. Pd is a given proportion of sample size to the maximum sample size in disease data and used to choose \verb{SNP}s. Default \code{Pd =0.979}. |
|
36 |
+} |
|
37 |
+} |
|
38 |
+\details{ |
|
39 |
+The standard \bold{GWAS} cdata set should have the format with following columns: chrn, posit, rsid, \code{a1.x1, a1.x2}, \dots, \code{a1.xn}, \code{freq.x1, freq.x2}, \dots, \code{freq.xn}, \code{beta.x1, beta.x2}, \dots, \code{beta.xn}, \code{sd.x1, sd.x2}, \dots, \code{sd.xn}, \code{pvj}, \code{N.x1, N.x2}, \dots, \code{N.xn}, \code{pcj}. The standard \bold{GWAS} \emph{ddata} set should have\code{hg.d}, \code{SNP.d},\code{a1.d}, \code{freq.d}, \code{beta.d}, \code{N.case},\code{N.ctr},\code{freq.case} where \code{x1, x2}, \dots, \code{xn} are causal variables. See example. |
|
40 |
+\describe{ |
|
41 |
+\item{beta}{ is a numeric vector that is a column of beta values for regression of SNPs on variable vector \code{X={x1, x2, \dots, xn}}. |
|
42 |
+} |
|
43 |
+\item{freq}{ is a numeric vector that is a column of frequencies of allele 1 with respect to variable vector \code{X={x1, x2, \dots, xn}}. |
|
44 |
+} |
|
45 |
+\item{sd}{is a numeric vector that is a column of standard deviations of variable \code{x1,x2}, \dots, \code{xn} specific to \verb{SNP}. Note that here sd is not beta standard deviation. If sd is not specifical to \verb{SNP}s, then sd.xi has the same value for all SNPs in variable \code{i}. |
|
46 |
+ } |
|
47 |
+\item{d}{denotes disease.} |
|
48 |
+\item{N}{is sample size.} |
|
49 |
+\item{freq.case}{is frequency of disease.} |
|
50 |
+\item{chrn}{is a numeric vector for chromosome #.} |
|
51 |
+\item{posit}{is a numeric vector for \verb{SNP} positions on chromosome #. Some time, \verb{chrn} and posit are combined into string vector: \code{hg19/hg18}. |
|
52 |
+ } |
|
53 |
+\item{pvj}{is defined as p-value, \code{pcj} and \code{pdj} as proportions of sample size for \verb{SNP} \code{j} to the maximum sample size in the causal variable data and in disease data, respectively. |
|
54 |
+ } |
|
55 |
+ } |
|
56 |
+} |
|
57 |
+\value{ |
|
58 |
+Return a standard \verb{SNP} beta or \verb{SNP} path table containing \code{m} \verb{SNP}s chosen with \code{LG, Pv, Pc and Pd} and \code{n} variables and disease for \code{Mendelian randomization} and \code{path analysis}. |
|
59 |
+} |
|
60 |
+\references{ |
|
61 |
+Do, R. et al. 2013. Common variants associated with plasma triglycerides and risk for coronary artery disease. \emph{Nat Genet} \bold{45}: 1345-1352. |
|
62 |
+ |
|
63 |
+\\Sheehan, N.A. et al. 2008. Mendelian randomisation and causal inference in observational epidemiology. \emph{PLoS Med} \bold{5}: e177. |
|
64 |
+ |
|
65 |
+\\Sheehan, N.A.,et al. 2010. Mendelian randomisation: a tool for assessing causality in observational epidemiology. \emph{Methods Mol Biol} \bold{713}: 153-166. |
|
66 |
+ |
|
67 |
+\\Willer, C.J. Schmidt, E.M. Sengupta, S. Peloso, G.M. Gustafsson, S. Kanoni, S. Ganna, A. Chen, J.,Buchkovich, M.L. Mora, S. et al (2013) Discovery and refinement of loci associated with lipid levels. \emph{Nat Genet} \bold{45}: 1274-1283. |
|
68 |
+} |
|