... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
Package: GMRP |
2 | 2 |
Type: Package |
3 | 3 |
Title: GWAS-based Mendelian Randomization and Path Analyses |
4 |
-Version: 1.7.1 |
|
5 |
-Date: 2015-11-04 |
|
6 |
-Author: Yuan-De Tan and Dajiang Liu |
|
4 |
+Version: 1.7.2 |
|
5 |
+Date: 2018-04-15 |
|
6 |
+Author: Yuan-De Tan |
|
7 | 7 |
Maintainer: Yuan-De Tan <tanyuande@gmail.com> |
8 | 8 |
Description: Perform Mendelian randomization analysis of multiple SNPs |
9 | 9 |
to determine risk factors causing disease of study and to |
... | ... |
@@ -1,11 +1,11 @@ |
1 | 1 |
exportPattern("^[[:alpha:]]+") |
2 |
-export(fmerg) |
|
2 |
+export(fmerge) |
|
3 | 3 |
export(chrp) |
4 | 4 |
export(mktable) |
5 | 5 |
export(path) |
6 | 6 |
export(pathdiagram) |
7 | 7 |
export(pathdiagram2) |
8 |
-export(snpposit) |
|
8 |
+export(snpPositAnnot) |
|
9 | 9 |
export(ucscannot) |
10 | 10 |
import(graphics) |
11 | 11 |
import(utils) |
... | ... |
@@ -29,7 +29,7 @@ pdj<-ND/max(ND) |
29 | 29 |
newcdat<-as.data.frame(newcdat) |
30 | 30 |
|
31 | 31 |
ddata<-cbind(ddata[idx,],pdj) |
32 |
-newcd<-fmerg(fl1=newcdat,fl2=ddata,ID1="rsid",ID2="SNP.d",A="",B="",method="no") |
|
32 |
+newcd<-fmerge(fl1=newcdat,fl2=ddata,ID1="rsid",ID2="SNP.d",A="",B="",method="no") |
|
33 | 33 |
sbnewdat<-subset(newcd,pdj>=Pd) |
34 | 34 |
sbnewdat<-as.data.frame(sbnewdat) |
35 | 35 |
|
36 | 36 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,105 @@ |
1 |
+snpPositAnnot<-function(SNPdata,SNP_hg19="chr",main){ |
|
2 |
+#This function provide mean length of interval between SNP positions on each chromosome |
|
3 |
+#and number of SNPs on each chromosome and ouput numbers of SNPs with interval length>=LG. |
|
4 |
+#The data contain hg19 or chr and position. If SNP_hg19 == chr, then data must contain chr column and posit column |
|
5 |
+#if SNP_hg19 != chr, then SNP_hg19 must be hg19 or hg18 that contain a column of "chrx:xxxxxxx". |
|
6 |
+ |
|
7 |
+if(SNP_hg19!="chr"){ |
|
8 |
+hg19<-as.character(SNPdata$SNP_hg19) |
|
9 |
+#print(hg19[1:10]) |
|
10 |
+chromp<-chrp(hg19) |
|
11 |
+#print(chromp[1:6,]) |
|
12 |
+}else{ |
|
13 |
+chr<-SNPdata$chr |
|
14 |
+if(length(chr)==0){ |
|
15 |
+stop("no data") |
|
16 |
+} |
|
17 |
+SNPdata<-SNPdata[order(chr),] |
|
18 |
+#print(SNPdata[1:3,]) |
|
19 |
+chrn<-as.numeric(SNPdata$chr) |
|
20 |
+posit<-as.numeric(SNPdata$posit) |
|
21 |
+chromp<-cbind(chrn,posit) |
|
22 |
+ |
|
23 |
+#print(CHR) |
|
24 |
+} |
|
25 |
+chr<-chromp[,1] |
|
26 |
+CHR<-unique(chr) |
|
27 |
+nch<-length(CHR) # number of chromosomes |
|
28 |
+ |
|
29 |
+#print(nch) |
|
30 |
+ |
|
31 |
+meanl<-m<-md<-rep(NA,nch) |
|
32 |
+ |
|
33 |
+mdist<-function(x){ |
|
34 |
+ |
|
35 |
+y<-as.numeric(x[,2]) |
|
36 |
+#print(y) |
|
37 |
+nr<-length(y) |
|
38 |
+y<-sort(y) |
|
39 |
+if(nr==0){ |
|
40 |
+lgdist<-0 |
|
41 |
+}else if(nr==1){ |
|
42 |
+lgdist<-log(y[1]) |
|
43 |
+ }else{ |
|
44 |
+dist<-rep(NA,nr-1) |
|
45 |
+for(i in 1:nr-1){ |
|
46 |
+dist[i]<-(y[(i+1)]-y[i]) |
|
47 |
+} |
|
48 |
+lgdist<-log(min(dist)) |
|
49 |
+#print(dist) |
|
50 |
+} |
|
51 |
+return(lgdist) |
|
52 |
+ |
|
53 |
+} |
|
54 |
+ |
|
55 |
+m<-md<-snpn<-rep(0,nch) |
|
56 |
+for(i in 1:nch){ |
|
57 |
+chrmp<-subset(chromp,chrn==CHR[i]) |
|
58 |
+snpn[i]<-nrow(chrmp) |
|
59 |
+if(snpn[i]!=0){ |
|
60 |
+# m[i]<-0 |
|
61 |
+# md[i]<-0}else{ |
|
62 |
+ |
|
63 |
+#m[i]<-nsnpchr(subset(chromp,chrn==i)) |
|
64 |
+ |
|
65 |
+md[i]<-mdist(subset(chromp,chrn==CHR[i])) |
|
66 |
+ |
|
67 |
+} |
|
68 |
+ |
|
69 |
+} |
|
70 |
+ |
|
71 |
+ |
|
72 |
+#mlength<-meanl[1:20] |
|
73 |
+colors<-c("red","lightsalmon","lightseagreen","mediumspringgreen","mediumorchid4","mediumpurple4","lightskyblue4","mediumseagreen","deeppink", |
|
74 |
+"mediumvioletred","firebrick","magenta","navyblue","lightsteelblue2","navajowhite","blue","maroon4","orange2","purple","darkorchid1", |
|
75 |
+"black","cyan") |
|
76 |
+ |
|
77 |
+chrcol<-rep(NA,nch) |
|
78 |
+for(i in 1:nch){ |
|
79 |
+ chrcol[i]<-colors[i] |
|
80 |
+} |
|
81 |
+ |
|
82 |
+par(oma=c(2,1,2,1)) |
|
83 |
+bp<-barplot(md,width=2,ylab="log least length (kbp) of intervals between SNPs", names.arg =paste("chr",CHR,sep=""),col=chrcol,ylim=c(0,30), |
|
84 |
+main=main,cex.lab=2.0,cex.names=2,cex.main=2.5,cex.axis=1.5) |
|
85 |
+ |
|
86 |
+ |
|
87 |
+# space between lab and column |
|
88 |
+YY<-rep(NA,nch) |
|
89 |
+for(i in 1:nch){ |
|
90 |
+ if(!is.na(md[i])){ |
|
91 |
+if(md[i]>=10){ |
|
92 |
+ YY[i]<-md[i]+1.5 |
|
93 |
+ }else if(md[i]<10&md[i]>0){ |
|
94 |
+ YY[i]<-md[i]+1.5 |
|
95 |
+ }else if(md[i]==0){ |
|
96 |
+ YY[i]<-2 |
|
97 |
+ } |
|
98 |
+ } |
|
99 |
+} |
|
100 |
+#text(x=bp,y=YY,labels=n,col="black") |
|
101 |
+# for data SNP247 |
|
102 |
+text(x=bp,y=YY,labels=snpn,col="black",cex=2.5) |
|
103 |
+# for data SNP368 |
|
104 |
+#return(m) |
|
105 |
+} |
0 | 106 |
deleted file mode 100644 |
... | ... |
@@ -1,108 +0,0 @@ |
1 |
-snpposit <- |
|
2 |
-function(SNPdata,SNP_hg19="chr",X="no",LG= 10,main="A",maxd=2000){ |
|
3 |
-SNPdata<-as.data.frame(SNPdata) |
|
4 |
-if(SNP_hg19!="chr"){ |
|
5 |
-hg19<-as.character(SNPdata$SNP_hg19) |
|
6 |
-#print(hg19[1:10]) |
|
7 |
- |
|
8 |
-chromp<-chrp(hg19) |
|
9 |
-#print(chromp[1:6,]) |
|
10 |
-}else{ |
|
11 |
-chr<-SNPdata$chr |
|
12 |
-if(length(chr)==0){ |
|
13 |
-stop("no data") |
|
14 |
-} |
|
15 |
- |
|
16 |
-SNPdata<-SNPdata[with(SNPdata,order(chr,posit)),] |
|
17 |
-chrn<-as.numeric(SNPdata$chr) |
|
18 |
-posit<-as.numeric(SNPdata$posit) |
|
19 |
-chromp<-cbind(chrn,posit) |
|
20 |
- |
|
21 |
-#print(CHR) |
|
22 |
-} |
|
23 |
-chr<-chromp[,1] |
|
24 |
-CHR<-unique(chr) |
|
25 |
-nch<-length(CHR) # number of chromosomes |
|
26 |
-meanl<-m<-md<-rep(NA,nch) |
|
27 |
- |
|
28 |
-#calculate SNP number on each chromosome |
|
29 |
-nsnpchr<-function(x,LG){ |
|
30 |
-y<-as.numeric(x[,2]) |
|
31 |
-#print(y) |
|
32 |
-nr<-length(y) |
|
33 |
-y<-sort(y) |
|
34 |
-m<-0 |
|
35 |
-ix<-seq(nr-1) |
|
36 |
-iy<-seq(from=2,to=nr) |
|
37 |
-m<-length(which(y[iy]-y[ix]>=LG)) |
|
38 |
-return(m) |
|
39 |
-} |
|
40 |
- |
|
41 |
-#averaged distance between SNPs on each chromosome |
|
42 |
-mdist<-function(x,LG){ |
|
43 |
-x<-as.data.frame(x) |
|
44 |
-y<-x[,2] |
|
45 |
-#print(y) |
|
46 |
-nr<-length(y) |
|
47 |
- y<-sort(y) |
|
48 |
- mdist<-(y[nr]-y[1])/((nr-1)*LG) |
|
49 |
-return(mdist) |
|
50 |
- |
|
51 |
-} |
|
52 |
- |
|
53 |
-#calculate average distance between SNPs and SNP number on each chromosome |
|
54 |
-m<-md<-snpn<-rep(0,nch) |
|
55 |
-for(i in 1:nch){ |
|
56 |
-chrmp<-subset(chromp,chrn==i) |
|
57 |
-snpn[i]<-nrow(chrmp) |
|
58 |
-if(snpn[i]!=0){ |
|
59 |
-m[i]<-nsnpchr(subset(chromp,chrn==i),LG) |
|
60 |
-md[i]<-mdist(subset(chromp,chrn==i),LG) |
|
61 |
- } |
|
62 |
-} |
|
63 |
- |
|
64 |
-md[which(md>=maxd)]<-maxd |
|
65 |
-md[which(is.na(md))]<-0 |
|
66 |
-#print(length(md)) |
|
67 |
-#print(length(CHR)) |
|
68 |
-colors<-c("red","lightsalmon","lightseagreen","mediumspringgreen","mediumorchid4","mediumpurple4","lightskyblue4","mediumseagreen","deeppink", |
|
69 |
-"mediumvioletred","firebrick","magenta","navyblue","lightsteelblue2","navajowhite","blue","maroon4","orange2","purple","darkorchid1", |
|
70 |
-"black","cyan") |
|
71 |
- |
|
72 |
-chrcol<-rep(NA,nch) |
|
73 |
-for(i in 1:nch){ |
|
74 |
- chrcol[i]<-colors[i] |
|
75 |
-} |
|
76 |
-if(X=="yes"){ |
|
77 |
-CHR[nch]<-"X" |
|
78 |
-} |
|
79 |
-#print(CHR) |
|
80 |
-par(oma=c(2,1,2,1)) |
|
81 |
-bp<-barplot(md,width=2,ylab="mean length (kbp) of interval between SNPs", names.arg =paste("chr",CHR,sep=""),col=chrcol,ylim=c(0,(maxd+250)), |
|
82 |
-main=main,cex.lab=1.5,cex.names=1,cex.main=2.5,cex.axis=1.5) |
|
83 |
- |
|
84 |
-# space between lab and column |
|
85 |
-YY<-rep(NA,nch) |
|
86 |
-for(i in 1:nch){ |
|
87 |
- if(!is.na(md[i])){ |
|
88 |
-if(md[i]>=1000){ |
|
89 |
-YY[i]<-md[i]*1.05 |
|
90 |
- }else if(md[i]<1000&md[i]>=500){ |
|
91 |
- YY[i]<-md[i]*1.08 |
|
92 |
- }else if(md[i]<500&md[i]>=200){ |
|
93 |
- YY[i]<-md[i]*1.2 |
|
94 |
- }else if(md[i]<200&md[i]>=100){ |
|
95 |
- YY[i]<-(md[i]*1.8) |
|
96 |
- }else if(md[i]<100&md[i]>=50){ |
|
97 |
- YY[i]<-(md[i]*2.5) |
|
98 |
- }else if(md[i]<50&md[i]>0){ |
|
99 |
- YY[i]<-(md[i]*5) |
|
100 |
- }else if(md[i]==0){ |
|
101 |
- YY[i]<-45 |
|
102 |
- } |
|
103 |
- } |
|
104 |
-} |
|
105 |
-#text(x=bp,y=YY,labels=n,col="black") |
|
106 |
-text(x=bp,y=YY,labels=snpn,col="black",cex=1.3) |
|
107 |
-return(m) |
|
108 |
-} |
111 | 2 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,304 @@ |
1 |
+### R code from vignette source 'GMRP.Rnw' |
|
2 |
+ |
|
3 |
+################################################### |
|
4 |
+### code chunk number 1: <style-Sweave |
|
5 |
+################################################### |
|
6 |
+BiocStyle::latex() |
|
7 |
+ |
|
8 |
+#library("knitr") |
|
9 |
+#opts_chunk$set(tidy=FALSE,dev="pdf",fig.show="hide", |
|
10 |
+ # fig.width=4,fig.height=4.5, |
|
11 |
+ # dpi=300,# increase dpi to avoid pixelised pngs |
|
12 |
+ # message=FALSE) |
|
13 |
+ |
|
14 |
+################################################### |
|
15 |
+### code chunk number 2: GMRP.Rnw:40-43 |
|
16 |
+################################################### |
|
17 |
+set.seed(102) |
|
18 |
+options(width = 90) |
|
19 |
+ |
|
20 |
+ |
|
21 |
+################################################### |
|
22 |
+### code chunk number 3: GMRP.Rnw:45-47 |
|
23 |
+################################################### |
|
24 |
+library(GMRP) |
|
25 |
+ |
|
26 |
+################################################### |
|
27 |
+### code chunk number 4: fmerge.Rnw:56-72 |
|
28 |
+################################################### |
|
29 |
+data1 <- matrix(NA, 20, 4) |
|
30 |
+data2 <- matrix(NA, 30, 7) |
|
31 |
+SNPID1 <- paste("rs", seq(1:20), sep="") |
|
32 |
+SNPID2 <- paste("rs", seq(1:30), sep="") |
|
33 |
+data1[,1:4] <- c(round(runif(20), 4), round(runif(20), 4), round(runif(20), 4), round(runif(20), 4)) |
|
34 |
+data2[,1:4] <- c(round(runif(30), 4),round(runif(30), 4), round(runif(30), 4), round(runif(30), 4)) |
|
35 |
+data2[,5:7] <- c(round(seq(30)*runif(30), 4), round(seq(30)*runif(30), 4), seq(30)) |
|
36 |
+data1 <- cbind(SNPID1, as.data.frame(data1)) |
|
37 |
+data2 <- cbind(SNPID2, as.data.frame(data2)) |
|
38 |
+dim(data1) |
|
39 |
+dim(data2) |
|
40 |
+colnames(data1) <- c("SNP", "var1", "var2", "var3", "var4") |
|
41 |
+colnames(data2) <- c("SNP", "var1", "var2", "var3", "var4", "V1", "V2", "V3") |
|
42 |
+data1<-DataFrame(data1) |
|
43 |
+data2<-DataFrame(data2) |
|
44 |
+data12 <- fmerge(fl1=data1, fl2=data2, ID1="SNP", ID2="SNP", A=".dat1", B=".dat2", method="No") |
|
45 |
+ |
|
46 |
+################################################### |
|
47 |
+### code chunk number 5: disease data load:90-93 |
|
48 |
+################################################### |
|
49 |
+data(cad.data) |
|
50 |
+#cad <- DataFrame(cad.data) |
|
51 |
+cad<-cad.data |
|
52 |
+head(cad) |
|
53 |
+################################################### |
|
54 |
+### code chunk number 6:lipid data load: 95-99 |
|
55 |
+################################################### |
|
56 |
+data(lpd.data) |
|
57 |
+#lpd <- DataFrame(lpd.data) |
|
58 |
+lpd<-lpd.data |
|
59 |
+head(lpd) |
|
60 |
+ |
|
61 |
+################################################### |
|
62 |
+### code chunk number 7: choose SNPs: 127-152 |
|
63 |
+################################################### |
|
64 |
+ |
|
65 |
+################################################### |
|
66 |
+# Step1: calculate pvj: 129-136 |
|
67 |
+################################################### |
|
68 |
+ |
|
69 |
+pvalue.LDL <- lpd$P.value.LDL |
|
70 |
+pvalue.HDL <-lpd$P.value.HDL |
|
71 |
+pvalue.TG <- lpd$P.value.TG |
|
72 |
+pvalue.TC <- lpd$P.value.TC |
|
73 |
+pv <- cbind(pvalue.LDL, pvalue.HDL, pvalue.TG, pvalue.TC) |
|
74 |
+pvj <- apply(pv, 1, min) |
|
75 |
+ |
|
76 |
+################################################### |
|
77 |
+# Step2: retrieve causal variables from data: 139-145 |
|
78 |
+################################################### |
|
79 |
+ |
|
80 |
+beta.LDL <- lpd$beta.LDL |
|
81 |
+beta.HDL <- lpd$beta.HDL |
|
82 |
+beta.TG <- lpd$beta.TG |
|
83 |
+beta.TC <- lpd$beta.TC |
|
84 |
+beta <- cbind(beta.LDL, beta.HDL, beta.TG, beta.TC) |
|
85 |
+ |
|
86 |
+################################################### |
|
87 |
+# Step3: construct a matrix for allele1: 148-154 |
|
88 |
+################################################### |
|
89 |
+ |
|
90 |
+a1.LDL <- lpd$A1.LDL |
|
91 |
+a1.HDL <- lpd$A1.HDL |
|
92 |
+a1.TG <- lpd$A1.TG |
|
93 |
+a1.TC <- lpd$A1.TC |
|
94 |
+alle1 <- cbind(a1.LDL, a1.HDL, a1.TG, a1.TC) |
|
95 |
+ |
|
96 |
+################################################### |
|
97 |
+# Step4: calculate pcj:157-165 |
|
98 |
+################################################### |
|
99 |
+ |
|
100 |
+N.LDL <- lpd$N.LDL |
|
101 |
+N.HDL <- lpd$N.HDL |
|
102 |
+N.TG <- lpd$N.TG |
|
103 |
+N.TC <- lpd$N.TC |
|
104 |
+ss <- cbind(N.LDL, N.HDL, N.TG, N.TC) |
|
105 |
+sm <- apply(ss,1,sum) |
|
106 |
+pcj <- round(sm/max(sm), 6) |
|
107 |
+ |
|
108 |
+################################################### |
|
109 |
+# Step5: Construct matrix for frequency of allele1:168-174 |
|
110 |
+################################################### |
|
111 |
+ |
|
112 |
+freq.LDL<-lpd$Freq.A1.1000G.EUR.LDL |
|
113 |
+freq.HDL<-lpd$Freq.A1.1000G.EUR.HDL |
|
114 |
+freq.TG<-lpd$Freq.A1.1000G.EUR.TG |
|
115 |
+freq.TC<-lpd$Freq.A1.1000G.EUR.TC |
|
116 |
+freq<-cbind(freq.LDL,freq.HDL,freq.TG,freq.TC) |
|
117 |
+ |
|
118 |
+################################################### |
|
119 |
+# Step6: construct matrix for sd of each causal variable: 177-183 |
|
120 |
+################################################### |
|
121 |
+ |
|
122 |
+sd.LDL <- rep(37.42, length(pvj)) |
|
123 |
+sd.HDL <- rep(14.87, length(pvj)) |
|
124 |
+sd.TG <-rep(92.73, length(pvj)) |
|
125 |
+sd.TC <- rep(42.74, length(pvj)) |
|
126 |
+sd <- cbind(sd.LDL, sd.HDL, sd.TG, sd.TC) |
|
127 |
+ |
|
128 |
+################################################### |
|
129 |
+# Step7: SNPID and position: 186-189 |
|
130 |
+################################################### |
|
131 |
+<<Step7, keep.source=TRUE, eval=FALSE>>= |
|
132 |
+hg19 <- lpd$SNP_hg19.HDL |
|
133 |
+rsid <- lpd$rsid.HDL |
|
134 |
+ |
|
135 |
+################################################### |
|
136 |
+# Step8: separate chromosome number and SNP position: 192-194 |
|
137 |
+################################################### |
|
138 |
+chr<-chrp(hg=hg19) |
|
139 |
+ |
|
140 |
+################################################### |
|
141 |
+# Step9: get new data: 197-201 |
|
142 |
+################################################### |
|
143 |
+ |
|
144 |
+newdata<-cbind(freq,beta,sd,pvj,ss,pcj) |
|
145 |
+newdata<-cbind(chr,rsid,alle1,as.data.frame(newdata)) |
|
146 |
+dim(newdata) |
|
147 |
+ |
|
148 |
+ |
|
149 |
+################################################### |
|
150 |
+# Step10: retrieve data from cad and calculate pdj:204-215 |
|
151 |
+################################################### |
|
152 |
+ |
|
153 |
+hg18.d <- cad$chr_pos_b36 |
|
154 |
+SNP.d <- cad$SNP #SNPID |
|
155 |
+a1.d<- tolower(cad$reference_allele) |
|
156 |
+freq.d <- cad$ref_allele_frequency |
|
157 |
+pvalue.d <- cad$pvalue |
|
158 |
+beta.d <- cad$log_odds |
|
159 |
+N.case <- cad$N_case |
|
160 |
+N.ctr <- cad$N_control |
|
161 |
+N.d <- N.case+N.ctr |
|
162 |
+freq.case <- N.case/N.d |
|
163 |
+ |
|
164 |
+################################################### |
|
165 |
+ # Step11: combine these cad variables into new data sheet:218-222 |
|
166 |
+ ################################################### |
|
167 |
+ |
|
168 |
+newcad <- cbind(freq.d, beta.d, N.case, N.ctr, freq.case) |
|
169 |
+newcad <- cbind(hg18.d, SNP.d, a1.d, as.data.frame(newcad)) |
|
170 |
+dim(newcad) |
|
171 |
+ |
|
172 |
+################################################### |
|
173 |
+#Step12: give name vector of causal variables: 225-227 |
|
174 |
+################################################### |
|
175 |
+varname <-c("CAD", "LDL", "HDL", "TG", "TC") |
|
176 |
+ |
|
177 |
+################################################### |
|
178 |
+### Step 13: choose SNPs and make beta table |
|
179 |
+################################################### |
|
180 |
+mybeta <- mktable(cdata=newdata, ddata=newcad, rt="beta", |
|
181 |
+varname=varname, LG=1, Pv=0.00000005, Pc=0.979, Pd=0.979) |
|
182 |
+dim(mybeta) |
|
183 |
+beta <- mybeta[,4:8] # standard beta table for path analysis |
|
184 |
+snp <- mybeta[,1:3] # snp data for annotation analysis |
|
185 |
+beta<-DataFrame(beta) |
|
186 |
+head(beta) |
|
187 |
+ |
|
188 |
+################################################### |
|
189 |
+### load beta data: 256-264 |
|
190 |
+################################################### |
|
191 |
+data(beta.data) |
|
192 |
+beta.data<-DataFrame(beta.data) |
|
193 |
+CAD <- beta.data$cad |
|
194 |
+LDL <- beta.data$ldl |
|
195 |
+HDL <- beta.data$hdl |
|
196 |
+TG <- beta.data$tg |
|
197 |
+TC <- beta.data$tc |
|
198 |
+ |
|
199 |
+################################################### |
|
200 |
+### two way scatter: 266-280 |
|
201 |
+################################################### |
|
202 |
+par(mfrow=c(2, 2), mar=c(5.1, 4.1, 4.1, 2.1), oma=c(0, 0, 0, 0)) |
|
203 |
+plot(LDL,CAD, pch=19, col="blue", xlab="beta of SNPs on LDL", |
|
204 |
+ylab="beta of SNP on CAD", cex.lab=1.5, cex.axis=1.5, cex.main=2) |
|
205 |
+abline(lm(CAD~LDL), col="red", lwd=2) |
|
206 |
+plot(HDL, CAD, pch=19,col="blue", xlab="beta of SNPs on HDL", |
|
207 |
+ylab="beta of SNP on CAD", cex.lab=1.5, cex.axis=1.5, cex.main=2) |
|
208 |
+abline(lm(CAD~HDL), col="red", lwd=2) |
|
209 |
+plot(TG, CAD, pch=19, col="blue", xlab="beta of SNPs on TG", |
|
210 |
+ylab="beta of SNP on CAD",cex.lab=1.5, cex.axis=1.5, cex.main=2) |
|
211 |
+abline(lm(CAD~TG), col="red", lwd=2) |
|
212 |
+plot(TC,CAD, pch=19, col="blue", xlab="beta of SNPs on TC", |
|
213 |
+ylab="beta of SNP on CAD", cex.lab=1.5, cex.axis=1.5, cex.main=2) |
|
214 |
+abline(lm(CAD~TC), col="red", lwd=2) |
|
215 |
+ |
|
216 |
+################################################### |
|
217 |
+### MR and Path Analysis: 296-300 |
|
218 |
+################################################### |
|
219 |
+data(beta.data) |
|
220 |
+mybeta <- DataFrame(beta.data) |
|
221 |
+mod <- CAD~LDL+HDL+TG+TC |
|
222 |
+pathvalue <- path(betav=mybeta, model=mod, outcome="CAD") |
|
223 |
+ |
|
224 |
+################################################### |
|
225 |
+### Create Path Diagram: 305-312 |
|
226 |
+################################################### |
|
227 |
+ |
|
228 |
+mypath <- matrix(NA,3,4) |
|
229 |
+mypath[1,] <- c(1.000000, -0.066678, 0.420036, 0.764638) |
|
230 |
+mypath[2,] <- c(-0.066678, 1.000000, -0.559718, 0.496831) |
|
231 |
+mypath[3,] <- c(0.420036, -0.559718, 1.000000, 0.414346) |
|
232 |
+colnames(mypath) <- c("LDL", "HDL", "TG", "path") |
|
233 |
+mypath<-as.data.frame(mypath) |
|
234 |
+mypath |
|
235 |
+ |
|
236 |
+################################################### |
|
237 |
+### load package: diagram: 324-326 |
|
238 |
+################################################### |
|
239 |
+library(diagram) |
|
240 |
+ |
|
241 |
+################################################### |
|
242 |
+# make path diagram |
|
243 |
+################################################### |
|
244 |
+pathdiagram(pathdata=mypath, disease="CAD", R2=0.988243, range=c(1:3)) |
|
245 |
+ |
|
246 |
+pathD<-matrix(NA,4,5) |
|
247 |
+pathD[1,] <- c(1, -0.070161, 0.399038, 0.907127, 1.210474) |
|
248 |
+pathD[2,] <- c(-0.070161, 1, -0.552106, 0.212201, 0.147933) |
|
249 |
+pathD[3,] <- c(0.399038, -0.552106, 1, 0.44100, 0.64229) |
|
250 |
+pathD[4,] <- c(0.907127, 0.212201, 0.441007, 1, -1.035677) |
|
251 |
+colnames(pathD) <- c("LDL", "HDL", "TG", "TC", "path") |
|
252 |
+pathD<-as.data.frame(pathD) |
|
253 |
+pathD |
|
254 |
+ |
|
255 |
+pathdiagram2(pathD=pathD,pathO=mypath,rangeD=c(1:4),rangeO=c(1:3), |
|
256 |
+disease="CAD", R2D=0.536535,R2O=0.988243) |
|
257 |
+ |
|
258 |
+################################################### |
|
259 |
+# load SNP358 data |
|
260 |
+################################################### |
|
261 |
+ |
|
262 |
+data(SNP358.data) |
|
263 |
+SNP358 <- DataFrame(SNP358.data) |
|
264 |
+head(SNP358) |
|
265 |
+ |
|
266 |
+################################################### |
|
267 |
+# load package library(graphics) |
|
268 |
+################################################### |
|
269 |
+ |
|
270 |
+library(graphics) |
|
271 |
+ |
|
272 |
+################################################### |
|
273 |
+# chromosome SNP position histogram |
|
274 |
+########################################## |
|
275 |
+snpPositAnnot(SNPdata=SNP358,SNP_hg19="chr",main="A") |
|
276 |
+ |
|
277 |
+ |
|
278 |
+################################################### |
|
279 |
+# SNP function annotation analysis |
|
280 |
+################################################### |
|
281 |
+ |
|
282 |
+################################################### |
|
283 |
+# load SNP annotation data: 431-434 |
|
284 |
+################################################### |
|
285 |
+ |
|
286 |
+data(SNP368annot.data) |
|
287 |
+SNP368<-DataFrame(SNP368annot.data) |
|
288 |
+SNP368[1:10, ] |
|
289 |
+ |
|
290 |
+################################################### |
|
291 |
+# load package plotrix |
|
292 |
+################################################### |
|
293 |
+ |
|
294 |
+################################################### |
|
295 |
+# make 3D Pie |
|
296 |
+############################################# |
|
297 |
+ucscannot(UCSCannot=SNP368,SNPn=368) |
|
298 |
+ |
|
299 |
+ucscannot(UCSCannot=SNP368,SNPn=368,A=3,B=2,C=1.3,method=2) |
|
300 |
+ |
|
301 |
+ |
|
302 |
+sessionInfo() |
|
303 |
+ |
|
304 |
+ |
... | ... |
@@ -9,25 +9,26 @@ |
9 | 9 |
\bold{GMRP} is used to perform Mendelian randomization analysis of causal variables on disease of study using SNP beta data from \bold{GWAS} or \bold{GWAS} meta analysis and furthermore execute path analysis of causal variables onto the disease. |
10 | 10 |
} |
11 | 11 |
\details{ |
12 |
-\bold{GMRP} can perform analyses of Mendelian randomization (MR),correlation, path of causal variables onto disease of interest and SNP annotation summarization analysis. MR includes SNP selection with given criteria and regression analysis of causal variables on the disease to generate beta values of causal variables on the disease. Using the beta values, \bold{GMRP} performs correlation and path analyses to construct path diagrams of causal variables to the disease. \bold{GMRP} consists of 8 functions: \emph{chrp, fmerg, mktable, pathdiagram2, pathdiagram, path, snpposit, ucscannot} and 5 datasets: \code{beta.data, cad.data, lpd.data, SNP358.data and SNP368_annot.data}. Function \emph{chrp} is used to separate string vector hg19 into two numeric vectors: chromosome number and SNP position on chromosomes. Function \emph{fmerg} is used to merge two datasets into one dataset. Function \emph{mktable} performs SNP selection and creates a standard beta table for function path to do path analyses. Function \emph{pathdiagram} is used to create a path diagram of causal variables onto the disease or onto outcome. Function \emph{pathdiagram2} can merg two-level pathdiagrams into one nested pathdiagram where inner path diagram is a path diagram of causal variables contributing onto outcome and the outside path diagram is a diagram of path of causal variables including outcome onto the disease. The five datasets provide examples for running these functions. \code{lpd.data} and \code{cad.data} provide an example to create a standard beta dataset for \emph{path} function to do path analysis and SNP data for SNP annotation analysis by performing \emph{mktable} and \emph{fmerg}. beta.data are a standard beta dataset for path analysis. \code{SNP358.data} provide an example for function \emph{snpposit} to do SNP position annotation analysis and \code{SNP368_annot.data} are for function \emph{ucscannot} to do SNP function annotation analysis. It is specially emphasized that except for that making standard beta table using \emph{mktable} must be done in Unix/Linux system,GMRP can be performed in Windows or Mac OS. This is because GWAS datasets usually are very huge but standard beta table is small. If users'Unix/Linux system has X11 or the other graphics system, then user should perform GMRP in Unix/Linux system, otherwise, user should transfer a standard beta table to a local computer and run GMRP in it. |
|
12 |
+\bold{GMRP} can perform analyses of Mendelian randomization (MR),correlation, path of causal variables onto disease of interest and SNP annotation summarization analysis. MR includes SNP selection with given criteria and regression analysis of causal variables on the disease to generate beta values of causal variables on the disease. Using the beta values, \bold{GMRP} performs correlation and path analyses to construct path diagrams of causal variables to the disease. \bold{GMRP} consists |
|
13 |
+of 8 functions: \emph{chrp, fmerge, mktable, pathdiagram2, pathdiagram, path, snpPositAnnot, ucscannot} and 5 datasets: \code{beta.data, cad.data, lpd.data, SNP358.data and SNP368_annot.data}. Function \emph{chrp} is used to separate string vector hg19 into two numeric vectors: chromosome number and SNP position on chromosomes. Function \emph{fmerge} is used to merge two datasets into one dataset. Function \emph{mktable} performs SNP selection and creates a standard beta table for |
|
14 |
+function path to do path analyses. Function \emph{pathdiagram} is used to create a path diagram of causal variables onto the disease or onto outcome. Function \emph{pathdiagram2} can merge two-level pathdiagrams into one nested pathdiagram where inner path diagram is a path diagram of causal variables contributing onto outcome and the outside path diagram is a diagram of path of causal variables including outcome onto the disease. The five datasets provide examples for running these |
|
15 |
+functions. \code{lpd.data} and \code{cad.data} provide an example to create a standard beta dataset for \emph{path} function to do path analysis and SNP data for SNP annotation analysis by performing \emph{mktable} and \emph{fmerge}. beta.data are a standard beta dataset for path analysis. \code{SNP358.data} provide an example for function \emph{snpPositAnnot} to do SNP position annotation analysis and \code{SNP368_annot.data} are for function \emph{ucscannot} to do SNP function annotation analysis. It is specially emphasized that except for that making standard beta table using \emph{mktable} must be done in Unix/Linux system,GMRP can be performed in Windows or Mac OS. This is because GWAS datasets usually are very huge but standard beta table is small. If users'Unix/Linux system has X11 or the other graphics system, then user should perform GMRP in Unix/Linux system, otherwise, user should transfer a standard beta table to a local computer and run GMRP in it. |
|
13 | 16 |
|
14 | 17 |
} |
15 | 18 |
\author{ |
16 | 19 |
Yuan-De Tan |
17 | 20 |
\email{tanyuande@gmail.com} |
18 |
- |
|
19 |
-\\Dajiang Liu |
|
20 |
- |
|
21 |
-\\Maintainer: Yuan-De Tan |
|
21 |
+\cr |
|
22 |
+Maintainer: Yuan-De Tan |
|
22 | 23 |
} |
23 | 24 |
\references{ |
24 | 25 |
Do, R. et al. 2013. Common variants associated with plasma triglycerides and risk for coronary artery disease. Nat Genet 45: 1345-1352. |
25 |
- |
|
26 |
-\\Sheehan, N.A. et al. 2008. Mendelian randomisation and causal inference in observational epidemiology. PLoS Med 5: e177. |
|
27 |
- |
|
28 |
-\\Sheehan, N.A.,et al. 2010. Mendelian randomisation: a tool for assessing causality in observational epidemiology. Methods Mol Biol 713: 153-166.\\Wright, S. 1921. Correlation and causation. J. Agricultural Research 20: 557-585. |
|
29 |
- |
|
30 |
-\\Wright, S. 1934. The method of path coefficients Annals of Mathematical Statistics 5 (3): 161-215 5: 161-215. |
|
26 |
+\cr |
|
27 |
+Sheehan, N.A. et al. 2008. Mendelian randomisation and causal inference in observational epidemiology. PLoS Med 5: e177. |
|
28 |
+\cr |
|
29 |
+Sheehan, N.A.,et al. 2010. Mendelian randomisation: a tool for assessing causality in observational epidemiology. Methods Mol Biol 713: 153-166.\\Wright, S. 1921. Correlation and causation. J. Agricultural Research 20: 557-585. |
|
30 |
+\cr |
|
31 |
+Wright, S. 1934. The method of path coefficients Annals of Mathematical Statistics 5 (3): 161-215 5: 161-215. |
|
31 | 32 |
|
32 | 33 |
} |
33 | 34 |
\keyword{ package } |
... | ... |
@@ -17,7 +17,7 @@ Data of 358 SNPs |
17 | 17 |
} |
18 | 18 |
} |
19 | 19 |
\details{ |
20 |
-These 358 SNPs were chosen by using mktable with Pv = code{5x10e-08},\code{Pc = Pd =0.979} from code{lpd.data and cad.data}. They provide a data example for how to perform annotation analysis of SNP positions. |
|
20 |
+These 358 SNPs were chosen by using mktable with Pv = \code{5x10e-08},\code{Pc = Pd =0.979} from \code{lpd.data and cad.data}. They provide a data example for how to perform annotation analysis of SNP positions. |
|
21 | 21 |
} |
22 | 22 |
\value{ |
23 | 23 |
A set of data with 358 rows(SNPs) and 3 columns(SNP ID, chromosome # and SNP position on chromosomes). |
24 | 24 |
similarity index 81% |
25 | 25 |
rename from man/fmerg.Rd |
26 | 26 |
rename to man/fmerge.Rd |
... | ... |
@@ -1,5 +1,5 @@ |
1 |
-\name{fmerg} |
|
2 |
-\alias{fmerg} |
|
1 |
+\name{fmerge} |
|
2 |
+\alias{fmerge} |
|
3 | 3 |
\title{ |
4 | 4 |
Merge Two \bold{GWAS} Result Data Sheets |
5 | 5 |
} |
... | ... |
@@ -7,7 +7,7 @@ Merge Two \bold{GWAS} Result Data Sheets |
7 | 7 |
\emph{fmerg} can be used to merge two \bold{GWAS} result data sheets with the same key \verb{ID(SNP ID)} into one data sheet. |
8 | 8 |
} |
9 | 9 |
\usage{ |
10 |
-fmerg(fl1, fl2,ID1, ID2, A, B, method) |
|
10 |
+fmerge(fl1, fl2,ID1, ID2, A, B, method) |
|
11 | 11 |
} |
12 | 12 |
\arguments{ |
13 | 13 |
\item{fl1}{ |
... | ... |
@@ -33,7 +33,8 @@ method for merging. See details. |
33 | 33 |
} |
34 | 34 |
} |
35 | 35 |
\details{ |
36 |
-fl1 and fl2 are two \bold{GWAS} result data files from different studies or with different risk variables. They contain \verb{SNPID}, \verb{hg18}, \verb{hg19}(positions), beta values, allele, frequency, and so on. The method has four options: method="No","NO" or "no" means that all data with unmatch \verb{SNP}s are not saved in the merged file; method="All","ALL" or "all" lets fmerg save all the data with unmatched \verb{SNP}s from two files but they are not paired one-by-one. This is different from R \emph{merge} function. method="file1" will save the data with unmatched \verb{SNP}s only from file 1 in the merged file and method="file2" allows function \emph{fmerg} to save the data with unmatched \verb{SNP}s from file2 in the merged file. |
|
36 |
+fl1 and fl2 are two \bold{GWAS} result data files from different studies or with different risk variables. They contain \verb{SNPID}, \verb{hg18}, \verb{hg19}(positions), beta values, allele, frequency, and so on. The method has four options: method="No","NO" or "no" means that all data with unmatch \verb{SNP}s are not saved in the merged file; method="All","ALL" or "all" lets fmerge save all the data with unmatched \verb{SNP}s from two files but they are not paired one-by-one. This is |
|
37 |
+different from R \emph{merge} function. method="file1" will save the data with unmatched \verb{SNP}s only from file 1 in the merged file and method="file2" allows function \emph{fmerge} to save the data with unmatched \verb{SNP}s from file2 in the merged file. |
|
37 | 38 |
|
38 | 39 |
} |
39 | 40 |
\value{ |
... | ... |
@@ -43,8 +44,6 @@ Return a joined data sheet. |
43 | 44 |
\author{ |
44 | 45 |
Yuan-De Tan |
45 | 46 |
\email{tanyuande@gmail.com} |
46 |
- |
|
47 |
-\\Dajiang Liu |
|
48 | 47 |
} |
49 | 48 |
\note{ |
50 | 49 |
Function fmerg can also be applied to the other types of data. |
... | ... |
@@ -67,7 +66,7 @@ dim(data1) |
67 | 66 |
dim(data2) |
68 | 67 |
colnames(data1)<-c("SNP","var1","var2","var3","var4") |
69 | 68 |
colnames(data2)<-c("SNP","var1","var2","var3","var4","V1","V2","V3") |
70 |
-data12<-fmerg(fl1=data1,fl2=data2,ID1="SNP",ID2="SNP",A=".dat1",B=".dat2",method="No") |
|
69 |
+data12<-fmerge(fl1=data1,fl2=data2,ID1="SNP",ID2="SNP",A=".dat1",B=".dat2",method="No") |
|
71 | 70 |
#data12[1:3,] |
72 | 71 |
# SNP.dat1 var1.dat1 var2.dat1 var3.dat1 var4.dat1 SNP.dat2 var1.dat2 var2.dat2 |
73 | 72 |
#1 rs1 0.9152 0.9853 0.9879 0.9677 rs1 0.5041 0.5734 |
... | ... |
@@ -59,18 +59,16 @@ Return a standard \verb{SNP} beta or \verb{SNP} path table containing \code{m} \ |
59 | 59 |
} |
60 | 60 |
\references{ |
61 | 61 |
Do, R. et al. 2013. Common variants associated with plasma triglycerides and risk for coronary artery disease. \emph{Nat Genet} \bold{45}: 1345-1352. |
62 |
- |
|
63 |
-\\Sheehan, N.A. et al. 2008. Mendelian randomisation and causal inference in observational epidemiology. \emph{PLoS Med} \bold{5}: e177. |
|
64 |
- |
|
65 |
-\\Sheehan, N.A.,et al. 2010. Mendelian randomisation: a tool for assessing causality in observational epidemiology. \emph{Methods Mol Biol} \bold{713}: 153-166. |
|
66 |
- |
|
67 |
-\\Willer, C.J. Schmidt, E.M. Sengupta, S. Peloso, G.M. Gustafsson, S. Kanoni, S. Ganna, A. Chen, J.,Buchkovich, M.L. Mora, S. et al (2013) Discovery and refinement of loci associated with lipid levels. \emph{Nat Genet} \bold{45}: 1274-1283. |
|
62 |
+\cr |
|
63 |
+Sheehan, N.A. et al. 2008. Mendelian randomisation and causal inference in observational epidemiology. \emph{PLoS Med} \bold{5}: e177. |
|
64 |
+\cr |
|
65 |
+Sheehan, N.A.,et al. 2010. Mendelian randomisation: a tool for assessing causality in observational epidemiology. \emph{Methods Mol Biol} \bold{713}: 153-166. |
|
66 |
+\cr |
|
67 |
+Willer, C.J. Schmidt, E.M. Sengupta, S. Peloso, G.M. Gustafsson, S. Kanoni, S. Ganna, A. Chen, J.,Buchkovich, M.L. Mora, S. et al (2013) Discovery and refinement of loci associated with lipid levels. \emph{Nat Genet} \bold{45}: 1274-1283. |
|
68 | 68 |
} |
69 | 69 |
\author{ |
70 | 70 |
Yuan-De Tan |
71 | 71 |
\email{tanyuande@gmail.com} |
72 |
- |
|
73 |
-\\Dajiang Liu |
|
74 | 72 |
} |
75 | 73 |
\note{ |
76 | 74 |
The order of column variables must be \verb{chrn} \verb{posit} \verb{rsid} \code{a1.x1} \dots \code{a1.xn} \code{freq.x1} \dots \code{freq.xn} \code{beta.x1} \dots \code{beta.x1} \dots \code{beta.xn} \code{sd.x1} \dots \code{sd.xn} \dots otherwise, mktable would have error. see example. |
... | ... |
@@ -81,10 +79,11 @@ The order of column variables must be \verb{chrn} \verb{posit} \verb{rsid} \code |
81 | 79 |
} |
82 | 80 |
\examples{ |
83 | 81 |
data(lpd.data) |
84 |
-lpd<-DataFrame(lpd.data) |
|
82 |
+#lpd<-DataFrame(lpd.data) |
|
83 |
+lpd<-lpd.data |
|
85 | 84 |
data(cad.data) |
86 |
-cad<-DataFrame(cad.data) |
|
87 |
- |
|
85 |
+#cad<-DataFrame(cad.data) |
|
86 |
+cad<-cad.data |
|
88 | 87 |
# step 1: calculate pvj |
89 | 88 |
pvalue.LDL<-lpd$P.value.LDL |
90 | 89 |
pvalue.HDL<-lpd$P.value.HDL |
... | ... |
@@ -29,20 +29,18 @@ Return three matrices: beta coefficients of regressions of risk variables on out |
29 | 29 |
} |
30 | 30 |
\references{ |
31 | 31 |
Do, R. et al. 2013 Common variants associated with plasma triglycerides and risk for coronary artery disease. \emph{Nat Genet} \bold{45}: 1345-1352. |
32 |
- |
|
33 |
-\\Sheehan, N.A. et al. 2008 Mendelian randomisation and causal inference in observational epidemiology. PLoS Med 5: e177. |
|
34 |
- |
|
35 |
-\\Sheehan, N.A.,et al. 2010 Mendelian randomisation: a tool for assessing causality in observational epidemiology. \emph{Methods Mol Biol} \bold{713}: 153-166. |
|
36 |
- |
|
37 |
-\\Wright, S. 1921 Correlation and causation. \emph{J.Agricultural Research} \bold{20}: 557-585. |
|
38 |
- |
|
39 |
-\\Wright, S. 1934 The method of path coefficients. \emph{Annals of Mathematical Statistics} \bold{5} (3): 161-215. |
|
32 |
+\cr |
|
33 |
+Sheehan, N.A. et al. 2008 Mendelian randomisation and causal inference in observational epidemiology. PLoS Med 5: e177. |
|
34 |
+\cr |
|
35 |
+Sheehan, N.A.,et al. 2010 Mendelian randomisation: a tool for assessing causality in observational epidemiology. \emph{Methods Mol Biol} \bold{713}: 153-166. |
|
36 |
+\cr |
|
37 |
+Wright, S. 1921 Correlation and causation. \emph{J.Agricultural Research} \bold{20}: 557-585. |
|
38 |
+\cr |
|
39 |
+Wright, S. 1934 The method of path coefficients. \emph{Annals of Mathematical Statistics} \bold{5} (3): 161-215. |
|
40 | 40 |
} |
41 | 41 |
\author{ |
42 | 42 |
Yuan-De Tan |
43 | 43 |
\email{tanyuande@gmail.com} |
44 |
- |
|
45 |
-\\Dajiang Liu |
|
46 | 44 |
} |
47 | 45 |
\note{ |
48 | 46 |
betav may also be a matrix of SNP path coefficients onto risk variables and disease. |
49 | 48 |
similarity index 66% |
50 | 49 |
rename from man/snpposit.Rd |
51 | 50 |
rename to man/snpPositAnnot.Rd |
... | ... |
@@ -1,5 +1,5 @@ |
1 |
-\name{snpposit} |
|
2 |
-\alias{snpposit} |
|
1 |
+\name{snpPositAnnot} |
|
2 |
+\alias{snpPositAnnot} |
|
3 | 3 |
\title{ |
4 | 4 |
\emph{SNP} Position Annotation |
5 | 5 |
} |
... | ... |
@@ -7,7 +7,7 @@ |
7 | 7 |
This function is used to perform position annotation analysis of \verb{SNP}s chosen from \verb{GWAS}. |
8 | 8 |
} |
9 | 9 |
\usage{ |
10 |
-snpposit(SNPdata, SNP_hg19="chr", X="no",LG=10, main="A", maxd=2000) |
|
10 |
+snpPositAnnot(SNPdata, SNP_hg19="chr", main) |
|
11 | 11 |
} |
12 | 12 |
\arguments{ |
13 | 13 |
\item{SNPdata}{ |
... | ... |
@@ -16,18 +16,9 @@ snpposit(SNPdata, SNP_hg19="chr", X="no",LG=10, main="A", maxd=2000) |
16 | 16 |
\item{SNP_hg19}{ |
17 | 17 |
a string parameter. It may be "hg19" or "chr". If SNP_hg19="hg19",then \verb{SNPdata} contain a string vector of hg19 or if \verb{SNP_hg19}="chr", then \verb{SNPdata} consist of at lest two columns: \verb{chr} and \verb{posit}. \verb{chr} is chromosome number and posit is \verb{SNP} physical position on chromosomes. If data sheet has chromosome X, then character "X" should be changed to 23 in chr vector or chr23.######## in hg19 vector. |
18 | 18 |
} |
19 |
- \item{X}{ |
|
20 |
-character parameter. It has Two options: X="yes" or X="no". Default is X="no". |
|
21 |
-} |
|
22 |
- \item{LG}{ |
|
23 |
-a numeric parameter that gives maximum permissible distance between positions. |
|
24 |
-} |
|
25 |
- \item{main}{ |
|
26 |
-a string which is title of graph. If no title is given, then man="". |
|
27 |
-} |
|
28 |
- \item{maxd}{ |
|
29 |
-maxd is a numeric parameter that is maximum distance for truncating chromosome columns. If there are not too big differences among 23 chromosomes, then \verb{maxd} can be set to be larger than 2000. |
|
30 |
-} |
|
19 |
+ \item{main}{ |
|
20 |
+a string which is title of graph. If no title is given, then main="". |
|
21 |
+ } |
|
31 | 22 |
} |
32 | 23 |
\value{ |
33 | 24 |
Return a set of numbers of SNPs between which interval length > \bold{LG} on 23 chromosomes. This function also creates a histogram for averaged distances between SNPs and SNP numbers on chromosomes. |
... | ... |
@@ -35,8 +26,6 @@ Return a set of numbers of SNPs between which interval length > \bold{LG} on 23 |
35 | 26 |
\author{ |
36 | 27 |
Yuan-De Tan |
37 | 28 |
\email{tanyuande@gmail.com} |
38 |
- |
|
39 |
-\\Dajiang Liu |
|
40 | 29 |
} |
41 | 30 |
\note{ |
42 | 31 |
This function can also be applied to hg18 data with \code{SNP_hg19}="hg18". |
... | ... |
@@ -48,7 +37,7 @@ This function can also be applied to hg18 data with \code{SNP_hg19}="hg18". |
48 | 37 |
\examples{ |
49 | 38 |
data(SNP358.data) |
50 | 39 |
SNP358<-DataFrame(SNP358.data) |
51 |
-snpposit(SNPdata=SNP358,SNP_hg19="chr",X="Yes",LG=10,main="A",maxd=2000) |
|
40 |
+snpPositAnnot(SNPdata=SNP358,SNP_hg19="chr",main="A") |
|
52 | 41 |
} |
53 | 42 |
\keyword{graphics} |
54 | 43 |
\keyword{SNP position} |
... | ... |
@@ -33,7 +33,6 @@ Create a color \emph{pie3D} diagram and return a set of numeric values: proporti |
33 | 33 |
Yuan-De Tan |
34 | 34 |
\email{tanyuande@gmail.com} |
35 | 35 |
|
36 |
-\\Dajiang Liu |
|
37 | 36 |
} |
38 | 37 |
\note{ |
39 | 38 |
This function just need data of "Predicted function" and "symbol", so the other column data in UCSCannot do not impact the results of analysis. |
40 | 39 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,304 @@ |
1 |
+### R code from vignette source 'GMRP.Rnw' |
|
2 |
+ |
|
3 |
+################################################### |
|
4 |
+### code chunk number 1: <style-Sweave |
|
5 |
+################################################### |
|
6 |
+BiocStyle::latex() |
|
7 |
+ |
|
8 |
+#library("knitr") |
|
9 |
+#opts_chunk$set(tidy=FALSE,dev="pdf",fig.show="hide", |
|
10 |
+ # fig.width=4,fig.height=4.5, |
|
11 |
+ # dpi=300,# increase dpi to avoid pixelised pngs |
|
12 |
+ # message=FALSE) |
|
13 |
+ |
|
14 |
+################################################### |
|
15 |
+### code chunk number 2: GMRP.Rnw:40-43 |
|
16 |
+################################################### |
|
17 |
+set.seed(102) |
|
18 |
+options(width = 90) |
|
19 |
+ |
|
20 |
+ |
|
21 |
+################################################### |
|
22 |
+### code chunk number 3: GMRP.Rnw:45-47 |
|
23 |
+################################################### |
|
24 |
+library(GMRP) |
|
25 |
+ |
|
26 |
+################################################### |
|
27 |
+### code chunk number 4: fmerge.Rnw:56-72 |
|
28 |
+################################################### |
|
29 |
+data1 <- matrix(NA, 20, 4) |
|
30 |
+data2 <- matrix(NA, 30, 7) |
|
31 |
+SNPID1 <- paste("rs", seq(1:20), sep="") |
|
32 |
+SNPID2 <- paste("rs", seq(1:30), sep="") |
|
33 |
+data1[,1:4] <- c(round(runif(20), 4), round(runif(20), 4), round(runif(20), 4), round(runif(20), 4)) |
|
34 |
+data2[,1:4] <- c(round(runif(30), 4),round(runif(30), 4), round(runif(30), 4), round(runif(30), 4)) |
|
35 |
+data2[,5:7] <- c(round(seq(30)*runif(30), 4), round(seq(30)*runif(30), 4), seq(30)) |
|
36 |
+data1 <- cbind(SNPID1, as.data.frame(data1)) |
|
37 |
+data2 <- cbind(SNPID2, as.data.frame(data2)) |
|
38 |
+dim(data1) |
|
39 |
+dim(data2) |
|
40 |
+colnames(data1) <- c("SNP", "var1", "var2", "var3", "var4") |
|
41 |
+colnames(data2) <- c("SNP", "var1", "var2", "var3", "var4", "V1", "V2", "V3") |
|
42 |
+data1<-DataFrame(data1) |
|
43 |
+data2<-DataFrame(data2) |
|
44 |
+data12 <- fmerge(fl1=data1, fl2=data2, ID1="SNP", ID2="SNP", A=".dat1", B=".dat2", method="No") |
|
45 |
+ |
|
46 |
+################################################### |
|
47 |
+### code chunk number 5: disease data load:90-93 |
|
48 |
+################################################### |
|
49 |
+data(cad.data) |
|
50 |
+#cad <- DataFrame(cad.data) |
|
51 |
+cad<-cad.data |
|
52 |
+head(cad) |
|
53 |
+################################################### |
|
54 |
+### code chunk number 6:lipid data load: 95-99 |
|
55 |
+################################################### |
|
56 |
+data(lpd.data) |
|
57 |
+#lpd <- DataFrame(lpd.data) |
|
58 |
+lpd<-lpd.data |
|
59 |
+head(lpd) |
|
60 |
+ |
|
61 |
+################################################### |
|
62 |
+### code chunk number 7: choose SNPs: 127-152 |
|
63 |
+################################################### |
|
64 |
+ |
|
65 |
+################################################### |
|
66 |
+# Step1: calculate pvj: 129-136 |
|
67 |
+################################################### |
|
68 |
+ |
|
69 |
+pvalue.LDL <- lpd$P.value.LDL |
|
70 |
+pvalue.HDL <-lpd$P.value.HDL |
|
71 |
+pvalue.TG <- lpd$P.value.TG |
|
72 |
+pvalue.TC <- lpd$P.value.TC |
|
73 |
+pv <- cbind(pvalue.LDL, pvalue.HDL, pvalue.TG, pvalue.TC) |
|
74 |
+pvj <- apply(pv, 1, min) |
|
75 |
+ |
|
76 |
+################################################### |
|
77 |
+# Step2: retrieve causal variables from data: 139-145 |
|
78 |
+################################################### |
|
79 |
+ |
|
80 |
+beta.LDL <- lpd$beta.LDL |
|
81 |
+beta.HDL <- lpd$beta.HDL |
|
82 |
+beta.TG <- lpd$beta.TG |
|
83 |
+beta.TC <- lpd$beta.TC |
|
84 |
+beta <- cbind(beta.LDL, beta.HDL, beta.TG, beta.TC) |
|
85 |
+ |
|
86 |
+################################################### |
|
87 |
+# Step3: construct a matrix for allele1: 148-154 |
|
88 |
+################################################### |
|
89 |
+ |
|
90 |
+a1.LDL <- lpd$A1.LDL |
|
91 |
+a1.HDL <- lpd$A1.HDL |
|
92 |
+a1.TG <- lpd$A1.TG |
|
93 |
+a1.TC <- lpd$A1.TC |
|
94 |
+alle1 <- cbind(a1.LDL, a1.HDL, a1.TG, a1.TC) |
|
95 |
+ |
|
96 |
+################################################### |
|
97 |
+# Step4: calculate pcj:157-165 |
|
98 |
+################################################### |
|
99 |
+ |
|
100 |
+N.LDL <- lpd$N.LDL |
|
101 |
+N.HDL <- lpd$N.HDL |
|
102 |
+N.TG <- lpd$N.TG |
|
103 |
+N.TC <- lpd$N.TC |
|
104 |
+ss <- cbind(N.LDL, N.HDL, N.TG, N.TC) |
|
105 |
+sm <- apply(ss,1,sum) |
|
106 |
+pcj <- round(sm/max(sm), 6) |
|
107 |
+ |
|
108 |
+################################################### |
|
109 |
+# Step5: Construct matrix for frequency of allele1:168-174 |
|
110 |
+################################################### |
|
111 |
+ |
|
112 |
+freq.LDL<-lpd$Freq.A1.1000G.EUR.LDL |
|
113 |
+freq.HDL<-lpd$Freq.A1.1000G.EUR.HDL |
|
114 |
+freq.TG<-lpd$Freq.A1.1000G.EUR.TG |
|
115 |
+freq.TC<-lpd$Freq.A1.1000G.EUR.TC |
|
116 |
+freq<-cbind(freq.LDL,freq.HDL,freq.TG,freq.TC) |
|
117 |
+ |
|
118 |
+################################################### |
|
119 |
+# Step6: construct matrix for sd of each causal variable: 177-183 |
|
120 |
+################################################### |
|
121 |
+ |
|
122 |
+sd.LDL <- rep(37.42, length(pvj)) |
|
123 |
+sd.HDL <- rep(14.87, length(pvj)) |
|
124 |
+sd.TG <-rep(92.73, length(pvj)) |
|
125 |
+sd.TC <- rep(42.74, length(pvj)) |
|
126 |
+sd <- cbind(sd.LDL, sd.HDL, sd.TG, sd.TC) |
|
127 |
+ |
|
128 |
+################################################### |
|
129 |
+# Step7: SNPID and position: 186-189 |
|
130 |
+################################################### |
|
131 |
+<<Step7, keep.source=TRUE, eval=FALSE>>= |
|
132 |
+hg19 <- lpd$SNP_hg19.HDL |
|
133 |
+rsid <- lpd$rsid.HDL |
|
134 |
+ |
|
135 |
+################################################### |
|
136 |
+# Step8: separate chromosome number and SNP position: 192-194 |
|
137 |
+################################################### |
|
138 |
+chr<-chrp(hg=hg19) |
|
139 |
+ |
|
140 |
+################################################### |
|
141 |
+# Step9: get new data: 197-201 |
|
142 |
+################################################### |
|
143 |
+ |
|
144 |
+newdata<-cbind(freq,beta,sd,pvj,ss,pcj) |
|
145 |
+newdata<-cbind(chr,rsid,alle1,as.data.frame(newdata)) |
|
146 |
+dim(newdata) |
|
147 |
+ |
|
148 |
+ |
|
149 |
+################################################### |
|
150 |
+# Step10: retrieve data from cad and calculate pdj:204-215 |
|
151 |
+################################################### |
|
152 |
+ |
|
153 |
+hg18.d <- cad$chr_pos_b36 |
|
154 |
+SNP.d <- cad$SNP #SNPID |
|
155 |
+a1.d<- tolower(cad$reference_allele) |
|
156 |
+freq.d <- cad$ref_allele_frequency |
|
157 |
+pvalue.d <- cad$pvalue |
|
158 |
+beta.d <- cad$log_odds |
|
159 |
+N.case <- cad$N_case |
|
160 |
+N.ctr <- cad$N_control |
|
161 |
+N.d <- N.case+N.ctr |
|
162 |
+freq.case <- N.case/N.d |
|
163 |
+ |
|
164 |
+################################################### |
|
165 |
+ # Step11: combine these cad variables into new data sheet:218-222 |
|
166 |
+ ################################################### |
|
167 |
+ |
|
168 |
+newcad <- cbind(freq.d, beta.d, N.case, N.ctr, freq.case) |
|
169 |
+newcad <- cbind(hg18.d, SNP.d, a1.d, as.data.frame(newcad)) |
|
170 |
+dim(newcad) |
|
171 |
+ |
|
172 |
+################################################### |
|
173 |
+#Step12: give name vector of causal variables: 225-227 |
|
174 |
+################################################### |
|
175 |
+varname <-c("CAD", "LDL", "HDL", "TG", "TC") |
|
176 |
+ |
|
177 |
+################################################### |
|
178 |
+### Step 13: choose SNPs and make beta table |
|
179 |
+################################################### |
|
180 |
+mybeta <- mktable(cdata=newdata, ddata=newcad, rt="beta", |
|
181 |
+varname=varname, LG=1, Pv=0.00000005, Pc=0.979, Pd=0.979) |
|
182 |
+dim(mybeta) |
|
183 |
+beta <- mybeta[,4:8] # standard beta table for path analysis |
|
184 |
+snp <- mybeta[,1:3] # snp data for annotation analysis |
|
185 |
+beta<-DataFrame(beta) |
|
186 |
+head(beta) |
|
187 |
+ |
|
188 |
+################################################### |
|
189 |
+### load beta data: 256-264 |
|
190 |
+################################################### |
|
191 |
+data(beta.data) |
|
192 |
+beta.data<-DataFrame(beta.data) |
|
193 |
+CAD <- beta.data$cad |
|
194 |
+LDL <- beta.data$ldl |
|
195 |
+HDL <- beta.data$hdl |
|
196 |
+TG <- beta.data$tg |
|
197 |
+TC <- beta.data$tc |
|
198 |
+ |
|
199 |
+################################################### |
|
200 |
+### two way scatter: 266-280 |
|
201 |
+################################################### |
|
202 |
+par(mfrow=c(2, 2), mar=c(5.1, 4.1, 4.1, 2.1), oma=c(0, 0, 0, 0)) |
|
203 |
+plot(LDL,CAD, pch=19, col="blue", xlab="beta of SNPs on LDL", |
|
204 |
+ylab="beta of SNP on CAD", cex.lab=1.5, cex.axis=1.5, cex.main=2) |
|
205 |
+abline(lm(CAD~LDL), col="red", lwd=2) |
|
206 |
+plot(HDL, CAD, pch=19,col="blue", xlab="beta of SNPs on HDL", |
|
207 |
+ylab="beta of SNP on CAD", cex.lab=1.5, cex.axis=1.5, cex.main=2) |
|
208 |
+abline(lm(CAD~HDL), col="red", lwd=2) |
|
209 |
+plot(TG, CAD, pch=19, col="blue", xlab="beta of SNPs on TG", |
|
210 |
+ylab="beta of SNP on CAD",cex.lab=1.5, cex.axis=1.5, cex.main=2) |
|
211 |
+abline(lm(CAD~TG), col="red", lwd=2) |
|
212 |
+plot(TC,CAD, pch=19, col="blue", xlab="beta of SNPs on TC", |
|
213 |
+ylab="beta of SNP on CAD", cex.lab=1.5, cex.axis=1.5, cex.main=2) |
|
214 |
+abline(lm(CAD~TC), col="red", lwd=2) |
|
215 |
+ |
|
216 |
+################################################### |
|
217 |
+### MR and Path Analysis: 296-300 |
|
218 |
+################################################### |
|
219 |
+data(beta.data) |
|
220 |
+mybeta <- DataFrame(beta.data) |
|
221 |
+mod <- CAD~LDL+HDL+TG+TC |
|
222 |
+pathvalue <- path(betav=mybeta, model=mod, outcome="CAD") |
|
223 |
+ |
|
224 |
+################################################### |
|
225 |
+### Create Path Diagram: 305-312 |
|
226 |
+################################################### |
|
227 |
+ |
|
228 |
+mypath <- matrix(NA,3,4) |
|
229 |
+mypath[1,] <- c(1.000000, -0.066678, 0.420036, 0.764638) |
|
230 |
+mypath[2,] <- c(-0.066678, 1.000000, -0.559718, 0.496831) |
|
231 |
+mypath[3,] <- c(0.420036, -0.559718, 1.000000, 0.414346) |
|
232 |
+colnames(mypath) <- c("LDL", "HDL", "TG", "path") |
|
233 |
+mypath<-as.data.frame(mypath) |
|
234 |
+mypath |
|
235 |
+ |
|
236 |
+################################################### |
|
237 |
+### load package: diagram: 324-326 |
|
238 |
+################################################### |
|
239 |
+library(diagram) |
|
240 |
+ |
|
241 |
+################################################### |
|
242 |
+# make path diagram |
|
243 |
+################################################### |
|
244 |
+pathdiagram(pathdata=mypath, disease="CAD", R2=0.988243, range=c(1:3)) |
|
245 |
+ |
|
246 |
+pathD<-matrix(NA,4,5) |
|
247 |
+pathD[1,] <- c(1, -0.070161, 0.399038, 0.907127, 1.210474) |
|
248 |
+pathD[2,] <- c(-0.070161, 1, -0.552106, 0.212201, 0.147933) |
|
249 |
+pathD[3,] <- c(0.399038, -0.552106, 1, 0.44100, 0.64229) |
|
250 |
+pathD[4,] <- c(0.907127, 0.212201, 0.441007, 1, -1.035677) |
|
251 |
+colnames(pathD) <- c("LDL", "HDL", "TG", "TC", "path") |
|
252 |
+pathD<-as.data.frame(pathD) |
|
253 |
+pathD |
|
254 |
+ |
|
255 |
+pathdiagram2(pathD=pathD,pathO=mypath,rangeD=c(1:4),rangeO=c(1:3), |
|
256 |
+disease="CAD", R2D=0.536535,R2O=0.988243) |
|
257 |
+ |
|
258 |
+################################################### |
|
259 |
+# load SNP358 data |
|
260 |
+################################################### |
|
261 |
+ |
|
262 |
+data(SNP358.data) |
|
263 |
+SNP358 <- DataFrame(SNP358.data) |
|
264 |
+head(SNP358) |
|
265 |
+ |
|
266 |
+################################################### |
|
267 |
+# load package library(graphics) |
|
268 |
+################################################### |
|
269 |
+ |
|
270 |
+library(graphics) |
|
271 |
+ |
|
272 |
+################################################### |
|
273 |
+# chromosome SNP position histogram |
|
274 |
+########################################## |
|
275 |
+snpPositAnnot(SNPdata=SNP358,SNP_hg19="chr",main="A") |
|
276 |
+ |
|
277 |
+ |
|
278 |
+################################################### |
|
279 |
+# SNP function annotation analysis |
|
280 |
+################################################### |
|
281 |
+ |
|
282 |
+################################################### |
|
283 |
+# load SNP annotation data: 431-434 |
|
284 |
+################################################### |
|
285 |
+ |
|
286 |
+data(SNP368annot.data) |
|
287 |
+SNP368<-DataFrame(SNP368annot.data) |
|
288 |
+SNP368[1:10, ] |
|
289 |
+ |
|
290 |
+################################################### |
|
291 |
+# load package plotrix |
|
292 |
+################################################### |
|
293 |
+ |
|
294 |
+################################################### |
|
295 |
+# make 3D Pie |
|
296 |
+############################################# |
|
297 |
+ucscannot(UCSCannot=SNP368,SNPn=368) |
|
298 |
+ |
|
299 |
+ucscannot(UCSCannot=SNP368,SNPn=368,A=3,B=2,C=1.3,method=2) |
|
300 |
+ |
|
301 |
+ |
|
302 |
+sessionInfo() |
|
303 |
+ |
|
304 |
+ |
... | ... |
@@ -7,7 +7,7 @@ |
7 | 7 |
\documentclass[a4paper]{article} |
8 | 8 |
|
9 | 9 |
\title{GWAS-based Mendelian Randomization Path Analysis} |
10 |
-\author{Yuan-De Tan and Dajiang Liu\ \ |
|
10 |
+\author{Yuan-De Tan \ \ |
|
11 | 11 |
\texttt{tanyuande@gmail.com}} |
12 | 12 |
|
13 | 13 |
<<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>= |
... | ... |
@@ -19,13 +19,13 @@ BiocStyle::latex() |
19 | 19 |
\maketitle |
20 | 20 |
|
21 | 21 |
\begin{abstract} |
22 |
-\Rpackage{GMRP} can perform analyses of Mendelian randomization (\emph{MR}),correlation, path of causal variables onto disease of interest and \emph{SNP} annotation analysis. \emph{MR} includes \emph{SNP} selection with given criteria and regression analysis of causal variables on the disease to generate beta values of causal variables on the disease. Using the beta vectors, \Rpackage{GMRP} performs correlation and path analyses to construct path diagrams of causal variables to the disease. \Rpackage{GMRP} consists of 8 $R$ functions: \Rfunction{chrp},\Rfunction{fmerg},\Rfunction{mktable},\Rfunction{pathdiagram},\Rfunction{pathdiagram2},\Rfunction{path},\Rfunction{snpposit},\Rfunction{ucscannot} and 5 datasets: \Robject{beta.data,cad.data,lpd.data,SNP358.data,SNP368annot.data}. \Rfunction{chrp} is used to separate string vector \texttt{hg19} into two numeric vectors: chromosome number and \emph{SNP} chromosome position. Function \Rfunction{fmerg} is used to merge two \emph{GWAS} result datasets into one dataset. Function \Rfunction{mktable} performs \emph{SNP} selection and creates a standard beta table for function \Rfunction{path} to do \emph{MR} and path analyses. Function \Rfunction{pathdiagram} is used to create a path diagram of causal variables onto the disease or onto outcome. Function \Rfunction{pathdiagram2} can merge two-level \emph{pathdiagrams} into one nested \emph{pathdiagram} where inner \emph{pathdiagram} is a \emph{pathdiagram} of causal variables contributing to outcome and the outside \emph{pathdiagram} is a path diagram of causal variables including outcome onto the disease. The five datasets provide examples for running these functions. \Robject{lpd.data} and \Robject{cad.data} provide an example to create a standard beta dataset for path function to do path analysis and \emph{SNP} data for \emph{SNP} annotation analysis by performing \Rfunction{mktable} and \Rfunction{fmerg}. \Robject{beta.data} are a standard beta dataset for path analysis. \Robject{SNP358.data} provide an example for \Rfunction{snpposit} to do \emph{SNP} position annotation analysis and \Robject{SNP368annot.data} are for \Rfunction{ucscannot} to do \emph{SNP} function annotation analysis. |
|
22 |
+\Rpackage{GMRP} can perform analyses of Mendelian randomization (\emph{MR}),correlation, path of causal variables onto disease of interest and \emph{SNP} annotation analysis. \emph{MR} includes \emph{SNP} selection with given criteria and regression analysis of causal variables on the disease to generate beta values of causal variables on the disease. Using the beta vectors, \Rpackage{GMRP} performs correlation and path analyses to construct path diagrams of causal variables to the disease. \Rpackage{GMRP} consists of 8 $R$ functions: \Rfunction{chrp},\Rfunction{fmerge},\Rfunction{mktable},\Rfunction{pathdiagram},\Rfunction{pathdiagram2},\Rfunction{path},\Rfunction{snpPositAnnot},\Rfunction{ucscannot} and 5 datasets: \Robject{beta.data,cad.data,lpd.data,SNP358.data,SNP368annot.data}. \Rfunction{chrp} is used to separate string vector \texttt{hg19} into two numeric vectors: chromosome number and \emph{SNP} chromosome position. Function \Rfunction{fmerge} is used to merge two \emph{GWAS} result datasets into one dataset. Function \Rfunction{mktable} performs \emph{SNP} selection and creates a standard beta table for function \Rfunction{path} to do \emph{MR} and path analyses. Function \Rfunction{pathdiagram} is used to create a path diagram of causal variables onto a given disease or onto outcome. Function \Rfunction{pathdiagram2} can merge two-level \emph{pathdiagrams} into one nested \emph{pathdiagram} where inner \emph{pathdiagram} is a \emph{pathdiagram} of causal variables contributing to outcome and the outside \emph{pathdiagram} is a path diagram of causal variables including outcome onto the disease. The five datasets provide examples for running these functions. \Robject{lpd.data} and \Robject{cad.data} provide an example to create a standard beta dataset for path function to do path analysis and \emph{SNP} data for \emph{SNP} annotation analysis by performing \Rfunction{mktable} and \Rfunction{fmerge}. \Robject{beta.data} are a standard beta dataset for path analysis. \Robject{SNP358.data} provide an example for \Rfunction{snpPositAnnot} to do \emph{SNP} position annotation analysis and \Robject{SNP368annot.data} are for \Rfunction{ucscannot} to perform \emph{SNP} function annotation analysis. |
|
23 | 23 |
\end{abstract} |
24 | 24 |
|
25 | 25 |
\tableofcontents |
26 | 26 |
|
27 | 27 |
\section{Introduction} |
28 |
-As an example of human disease, coronary artery disease (\emph{CAD}) is one of the causes leading to death and infirmity worldwide~\cite{Murray1997}. Low-density lipoprotein cholesterol (\emph{LDL}) and triglycerides (\emph{TG}) are viewed as risk factors causing \emph{CAD}. In epidemiological studies, plasma concentrations of increasing \emph{TG} and \emph{LDL} and deceasing \emph{HDL} have been observed to be associated with risk for \emph{CAD}~\cite{Angelantonio2009, Sarwar2007}. However, from observational studies, one could not directly infer that these cholesterol concentrations in plasma are risk factors causing \emph{CAD}~\cite{Do2013, Sarwar2007, Sheehan2007, Voight2012}. A big limitation of observational studies is to difficultly distinguish between causal and spurious associations due to confounding~\cite{Pichler2013}. An efficient approach to overcome this limitation is \textit{Mendelian Randomization} (\emph{MR}) analysis ~\cite{Sheehan2007, Smith2003} where genetic variants are used as instrumental variables. For this reason, many investigators tried to use genetic variants to assess causality and estimate the causal effects on the diseases. |
|
28 |
+As an example of human disease, coronary artery disease (\emph{CAD}) is one of the causes leading to death and infirmity worldwide~\cite{Murray1997}. Low-density lipoprotein cholesterol (\emph{LDL}) and triglycerides (\emph{TG}) are viewed as risk factors causing \emph{CAD}. In epidemiological studies, plasma concentrations of increasing \emph{TG} and \emph{LDL} and deceasing high-density lipoprotein cholesterol (\emph{HDL} ) have been observed to be associated with risk for \emph{CAD}~\cite{Angelantonio2009, Sarwar2007}. However, from observational studies, one could not directly infer that these cholesterol concentrations in plasma are risk factors causing \emph{CAD}~\cite{Do2013, Sarwar2007, Sheehan2007, Voight2012}. A big limitation of observational studies is to difficultly distinguish between causal and spurious associations due to confounding~\cite{Pichler2013}. An efficient approach to overcome this limitation is \textit{Mendelian Randomization} (\emph{MR}) analysis ~\cite{Sheehan2007, Smith2003} where genetic variants are used as instrumental variables. For this reason, many investigators tried to use genetic variants to assess causality and estimate the causal effects on the diseases. |
|
29 | 29 |
|
30 | 30 |
\emph{MR} analysis can perfectly exclude confounding factors associated with disease. However, when we expand one causal variable to many, \emph{MR} analysis becomes challenged and complicated because the genetic variant would have additional effects on the other risk factors, which violate assumption of no pleiotropy. An unknown genetic variant in \emph{MR} analysis possibly provides a false instrument for causal effect assessment of risk factors on the disease. The reason is that if this genetic variant is in \emph{LD} with another gene that is not used but has effect on the disease of study~\cite{Sheehan2007, Sheehan2010}. It then violates the third assumption. These two problems can be addressed by using multiple instrumental variables. For this reason, Do \emph{et al} (2013)developed statistic approach to address this issue~\cite{Do2013}. However, method of Do \emph{et al} ~\cite{Do2013} cannot disentangle correlation effects among the multiple undefined risk factors on the disease of study. The beta values obtained from regression analyses are not direct causal effects because their effects are entangled with correlations among these undefined risk factors. |
31 | 31 |
|
... | ... |
@@ -52,7 +52,7 @@ library(GMRP) |
52 | 52 |
|
53 | 53 |
\Robject{lpd.data} was a subset (1069 SNPs) of four GWAS result datasets for \emph{LDL}, \emph{HDL}, \emph{TG} and \emph{TC}. These \emph{GWAS} result data sheets were downloaded from the website\footnote{\url{http://csg.sph.umich.edu//abecasis/public/lipids2013/} } where there are 120165 SNPs scattered on 23 chromosomes and 40 variables. Four GWAS result datasets for \emph{LDL}, \emph{HDL}, \emph{TG} and \emph{TC} were merged into one data sheet by using \Rfunction{fmerg}\textit{(fl1,fl2,ID1,ID2,A,B,method)} where \textit{fl1} and \textit{fl2} are two \emph{GWAS} result data sheets. $ID1$ and $ID2$ are key $id$ in files \textit{fl1} and \textit{fl2}, respectively, and required. $A$ and $B$ are postfix for \textit{fl1} and \textit{fl2}. Default values are $A$="" and $B$="". \textit{method} is method for merging . In the current version, there are four methods: \textit{method}="No" or "no" or "NO" or "N" or "n" means that the data with unmatched \emph{SNP}s in \textit{file1} and \textit{file 2} are not saved in the merged file; \textit{method}="ALL" or "All" or "all" or "A" or "a" indicates that the data with all unmatched \emph{SNP}s in \textit{file 1} and \textit{file 2} are saved in the unpaired way in the merged data file; if \textit{method}="\textit{file1}", then those with unmatched \emph{SNP}s only from file1 are saved or if \textit{method}="\textit{file2}", \Rfunction{fmerg} will save the data with unmatched \emph{SNP}s only from \textit{file2}". Here is a simple example: |
54 | 54 |
|
55 |
-<<fmerg,keep.source=TRUE, eval=FALSE>>= |
|
55 |
+<<fmerge,keep.source=TRUE, eval=FALSE>>= |
|
56 | 56 |
data1 <- matrix(NA, 20, 4) |
57 | 57 |
data2 <- matrix(NA, 30, 7) |
58 | 58 |
SNPID1 <- paste("rs", seq(1:20), sep="") |
... | ... |
@@ -68,15 +68,15 @@ colnames(data1) <- c("SNP", "var1", "var2", "var3", "var4") |
68 | 68 |
colnames(data2) <- c("SNP", "var1", "var2", "var3", "var4", "V1", "V2", "V3") |
69 | 69 |
data1<-DataFrame(data1) |
70 | 70 |
data2<-DataFrame(data2) |
71 |
-data12 <- fmerg(fl1=data1, fl2=data2, ID1="SNP", ID2="SNP", A=".dat1", B=".dat2", method="No") |
|
71 |
+data12 <- fmerge(fl1=data1, fl2=data2, ID1="SNP", ID2="SNP", A=".dat1", B=".dat2", method="No") |
|
72 | 72 |
@ |
73 | 73 |
|
74 | 74 |
User can take the following approach to merge all four lipid files into a data sheet: |
75 |
-\textit{LDL\underline{}HDL<-fmerg(fl1=LDL\underline{}file,fl2=HDL\underline{}file, ID1="SNP", ID2="SNP", A=".LDL", B=".HDL", method="No")} |
|
75 |
+\textit{LDL\underline{}HDL<-fmerge(fl1=LDL\underline{}file,fl2=HDL\underline{}file, ID1="SNP", ID2="SNP", A=".LDL", B=".HDL", method="No")} |
|
76 | 76 |
|
77 |
-\textit{TG\underline{}TC<-fmerg(fl1=TG\underline{}file,fl2=TC\underline{}file, ID1="SNP", ID2="SNP", A=".TG", B=".TC", method="No")} |
|
77 |
+\textit{TG\underline{}TC<-fmerge(fl1=TG\underline{}file,fl2=TC\underline{}file, ID1="SNP", ID2="SNP", A=".TG", B=".TC", method="No")} |
|
78 | 78 |
|
79 |
-\textit{lpd<-fmerg(fl1=LDL\underline{}HDL,fl2=TG\underline{}TC,ID1="SNP",ID2="SNP",A="",B="",method="No")} |
|
79 |
+\textit{lpd<-fmerge(fl1=LDL\underline{}HDL,fl2=TG\underline{}TC,ID1="SNP",ID2="SNP",A="",B="",method="No")} |
|
80 | 80 |
|
81 | 81 |
\Robject{cad.data} was also a subset (1069 SNPs) of original GWAS meta-analyzed dataset that was downloaded from the website\footnote{\url{http://www.cardiogramplusc4d.org/downloads/} } and contains 2420360 \emph{SNP}s and 12 variables. |
82 | 82 |
|
... | ... |
@@ -86,15 +86,17 @@ User can take the following approach to merge all four lipid files into a data s |
86 | 86 |
|
87 | 87 |
\Robject{SNP368annot.data} is the data obtained from function analysis with \href{http://snp-nexus.org/index.html}{\emph{SNP} Annotation Tool} and provides example of performing function \Rfunction{ucscanno} to draw a \texttt{3D} pie and output the results of proportions of \emph{SNP}s coming from gene function various elements. |
88 | 88 |
|
89 |
- <<>>= |
|
89 |
+<<>>= |
|
90 | 90 |
data(cad.data) |
91 |
-cad <- DataFrame(cad.data) |
|
91 |
+#cad <- DataFrame(cad.data) |
|
92 |
+cad<-cad.data |
|
92 | 93 |
head(cad) |
93 | 94 |
@ |
94 | 95 |
|
95 | 96 |
<<>>= |
96 | 97 |
data(lpd.data) |
97 |
-lpd <- DataFrame(lpd.data) |
|
98 |
+#lpd <- DataFrame(lpd.data) |
|
99 |
+lpd<-lpd.data |
|
98 | 100 |
head(lpd) |
99 | 101 |
@ |
100 | 102 |
|
... | ... |
@@ -406,7 +408,7 @@ With SNP data \Robject{SNP358}, we can perform \emph{SNP} position annotation us |
406 | 408 |
\textit{maxd} is a numeric parameter that is maximum distance for truncating chromosome columns. If there are not big differences among 23 chromosomes, then \textit{maxd} can be set to be larger than 2000$kbp$. Its default is 2000$kbp$. |
407 | 409 |
|
408 | 410 |
<<fig=FALSE,keep.source=TRUE, label=ChromHistogram>>= |
409 |
-snpposit(SNPdata=SNP358,SNP_hg19="chr",LG=10,main="A",maxd=2000) |
|
411 |
+snpPositAnnot(SNPdata=SNP358,SNP_hg19="chr",main="A") |
|
410 | 412 |
@ |
411 | 413 |
\begin{figure}[ht] |
412 | 414 |
\begin{center} |
... | ... |
@@ -457,7 +459,7 @@ ucscannot(UCSCannot=SNP368,SNPn=368) |
457 | 459 |
|
458 | 460 |
<<label=figPIE3D1, fig=TRUE,echo=FALSE>>= |
459 | 461 |
<<PIE3D1>> |
460 |
-@ v |
|
462 |
+@ |
|
461 | 463 |
\caption{ Distribution of the selected \emph{SNP}s in gene function elements.} |
462 | 464 |
\label{figure5} |
463 | 465 |
\end{center} |