Browse code

correct GMRP.Rnw error

y.tan authored on 26/05/2018 15:59:21
Showing 2 changed files

... ...
@@ -8,8 +8,8 @@ BiocStyle::latex()
8 8
 #library("knitr")
9 9
 #opts_chunk$set(tidy=FALSE,dev="pdf",fig.show="hide",
10 10
  #              fig.width=4,fig.height=4.5,
11
- #             dpi=300,# increase dpi to avoid pixelised pngs
12
- #             message=FALSE)
11
+  #             dpi=300,# increase dpi to avoid pixelised pngs
12
+  #             message=FALSE)
13 13
                
14 14
 ###################################################
15 15
 ### code chunk number 2: GMRP.Rnw:40-43
... ...
@@ -46,18 +46,15 @@ data12 <- fmerge(fl1=data1, fl2=data2, ID1="SNP", ID2="SNP", A=".dat1", B=".dat2
46 46
 ###################################################
47 47
 ### code chunk number 5: disease data load:90-93
48 48
 ###################################################
49
-
50 49
 data(cad.data)
51
-cad <- DataFrame(cad.data)
50
+#cad <- DataFrame(cad.data)
52 51
 cad<-cad.data
53 52
 head(cad)
54
-
55 53
 ###################################################
56 54
 ### code chunk number 6:lipid data load: 95-99
57 55
 ###################################################
58
-
59 56
 data(lpd.data)
60
-lpd <- DataFrame(lpd.data)
57
+#lpd <- DataFrame(lpd.data)
61 58
 lpd<-lpd.data
62 59
 head(lpd)
63 60
 
... ...
@@ -84,7 +81,7 @@ beta.LDL <- lpd$beta.LDL
84 81
 beta.HDL <- lpd$beta.HDL
85 82
 beta.TG <- lpd$beta.TG
86 83
 beta.TC <- lpd$beta.TC
87
-beta <- cbind(beta.LDL,beta.HDL,beta.TG,beta.TC)
84
+beta <- cbind(beta.LDL, beta.HDL, beta.TG, beta.TC)
88 85
 
89 86
 ###################################################
90 87
 #     Step3: construct a matrix for allele1: 148-154
... ...
@@ -94,7 +91,7 @@ a1.LDL <- lpd$A1.LDL
94 91
 a1.HDL <- lpd$A1.HDL
95 92
 a1.TG <- lpd$A1.TG
96 93
 a1.TC <- lpd$A1.TC
97
-alle1 <- cbind(a1.LDL,a1.HDL,a1.TG,a1.TC)
94
+alle1 <- cbind(a1.LDL, a1.HDL, a1.TG, a1.TC)
98 95
 
99 96
 ###################################################
100 97
 # Step4:  calculate pcj:157-165
... ...
@@ -104,7 +101,7 @@ N.LDL <- lpd$N.LDL
104 101
 N.HDL <- lpd$N.HDL
105 102
 N.TG <- lpd$N.TG
106 103
 N.TC <- lpd$N.TC
107
-ss <- cbind(N.LDL,N.HDL,N.TG,N.TC)
104
+ss <- cbind(N.LDL, N.HDL, N.TG, N.TC)
108 105
 sm <- apply(ss,1,sum)
109 106
 pcj <- round(sm/max(sm), 6)
110 107
 
... ...
@@ -131,15 +128,13 @@ sd <- cbind(sd.LDL, sd.HDL, sd.TG, sd.TC)
131 128
 ###################################################
132 129
 # Step7: SNPID and position: 186-189
133 130
 ###################################################
134
-
135
-#<<Step7, keep.source=TRUE, eval=FALSE>>=
131
+<<Step7, keep.source=TRUE, eval=FALSE>>=
136 132
 hg19 <- lpd$SNP_hg19.HDL
137 133
 rsid <- lpd$rsid.HDL
138 134
 
139 135
 ###################################################
140 136
 # Step8: separate chromosome number and SNP position: 192-194
141 137
 ###################################################
142
-
143 138
 chr<-chrp(hg=hg19)
144 139
 
145 140
 ###################################################
... ...
@@ -151,9 +146,9 @@ newdata<-cbind(chr,rsid,alle1,as.data.frame(newdata))
151 146
 dim(newdata)
152 147
 
153 148
 
154
-###########################################################
149
+###################################################
155 150
 # Step10: retrieve data from cad and calculate pdj:204-215
156
-###########################################################
151
+###################################################
157 152
 
158 153
 hg18.d <- cad$chr_pos_b36
159 154
 SNP.d <- cad$SNP #SNPID
... ...
@@ -166,23 +161,22 @@ N.ctr <- cad$N_control
166 161
 N.d <- N.case+N.ctr
167 162
 freq.case <- N.case/N.d
168 163
 
169
-###################################################################
164
+###################################################
170 165
  # Step11: combine these cad variables into new data sheet:218-222
171
- ##################################################################
166
+ ###################################################
172 167
 
173 168
 newcad <- cbind(freq.d, beta.d, N.case, N.ctr, freq.case)
174 169
 newcad <- cbind(hg18.d, SNP.d, a1.d, as.data.frame(newcad))
175 170
 dim(newcad)
176 171
 
177
-########################################################## 
172
+################################################### 
178 173
 #Step12: give name vector of causal variables: 225-227
179
-##########################################################
174
+###################################################
180 175
 varname <-c("CAD", "LDL", "HDL", "TG", "TC")
181 176
 
182 177
 ###################################################
183 178
 ### Step 13: choose SNPs and make beta table
184 179
 ###################################################
185
-
186 180
 mybeta <- mktable(cdata=newdata, ddata=newcad, rt="beta", 
187 181
 varname=varname, LG=1, Pv=0.00000005, Pc=0.979, Pd=0.979)
188 182
 dim(mybeta)
... ...
@@ -194,7 +188,6 @@ head(beta)
194 188
 ###################################################
195 189
 ### load beta data: 256-264
196 190
 ###################################################
197
-
198 191
 data(beta.data)
199 192
 beta.data<-DataFrame(beta.data)
200 193
 CAD <- beta.data$cad
... ...
@@ -206,7 +199,6 @@ TC <- beta.data$tc
206 199
 ###################################################
207 200
 ### two way scatter: 266-280
208 201
 ###################################################
209
-
210 202
 par(mfrow=c(2, 2), mar=c(5.1, 4.1, 4.1, 2.1), oma=c(0, 0, 0, 0))
211 203
 plot(LDL,CAD, pch=19, col="blue", xlab="beta of SNPs on LDL", 
212 204
 ylab="beta of SNP on CAD", cex.lab=1.5, cex.axis=1.5, cex.main=2)
... ...
@@ -224,7 +216,6 @@ abline(lm(CAD~TC), col="red", lwd=2)
224 216
 ###################################################
225 217
 ### MR and Path Analysis: 296-300
226 218
 ###################################################
227
-
228 219
 data(beta.data)
229 220
 mybeta <- DataFrame(beta.data)
230 221
 mod <- CAD~LDL+HDL+TG+TC
... ...
@@ -245,13 +236,11 @@ mypath
245 236
 ###################################################
246 237
 ### load package: diagram: 324-326
247 238
 ###################################################
248
-
249 239
 library(diagram)
250 240
 
251 241
 ###################################################
252 242
 # make path diagram
253 243
 ###################################################
254
-
255 244
 pathdiagram(pathdata=mypath, disease="CAD", R2=0.988243, range=c(1:3))
256 245
 
257 246
 pathD<-matrix(NA,4,5)
... ...
@@ -283,9 +272,9 @@ library(graphics)
283 272
 ###################################################
284 273
 # chromosome SNP position histogram
285 274
 ##########################################
286
-
287 275
 snpPositAnnot(SNPdata=SNP358,SNP_hg19="chr",main="A")
288 276
 
277
+
289 278
 ###################################################
290 279
 # SNP function annotation analysis
291 280
 ###################################################
... ...
@@ -305,7 +294,6 @@ SNP368[1:10, ]
305 294
 ###################################################
306 295
 # make 3D Pie
307 296
 #############################################
308
-
309 297
 ucscannot(UCSCannot=SNP368,SNPn=368)
310 298
 
311 299
 ucscannot(UCSCannot=SNP368,SNPn=368,A=3,B=2,C=1.3,method=2)
... ...
@@ -4,22 +4,35 @@
4 4
 % To compile this document
5 5
 % library('cacheSweave');rm(list=ls());Sweave('GMRP.Rnw',driver=cacheSweaveDriver());system("pdflatex GMRP")
6 6
 
7
-\documentclass[a4paper]{article}
7
+\documentclass{article}
8
+
9
+%\usepackage[authoryear,round]{natbib}
10
+ 
11
+<<style, echo=FALSE, results=tex>>=
12
+BiocStyle::latex(use.unsrturl=FALSE)
13
+@
8 14
 
9 15
 \title{GWAS-based Mendelian Randomization Path Analysis}
10 16
 \author{Yuan-De Tan \\
11 17
 \texttt{tanyuande@gmail.com}}
12 18
 
13
-<<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>=
14
-BiocStyle::latex()
15
-@ 
19
+%<<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>=
20
+%BiocStyle::latex()
21
+%@ 
16 22
 
17 23
 \begin{document}
18 24
 
19 25
 \maketitle
20 26
 
27
+<<options, echo=FALSE>>=
28
+options(width=72)
29
+options(showHeadLines=3)
30
+options(showTailLines=3)
31
+@
32
+
21 33
 \begin{abstract}
22
-\Rpackage{GMRP} can perform analyses of Mendelian randomization (\emph{MR}),correlation, path of causal variables onto disease of study and \emph{SNP} annotation analysis. \emph{MR} includes \emph{SNP} selection with given criteria and regression analysis of causal variables on the disease to generate beta values of causal variables on the disease. Using the beta vectors, \Rpackage{GMRP} performs correlation and path analyses to construct  path diagrams of causal variables to the disease. \Rpackage{GMRP} consists of 8 \emph{R} functions: \Rfunction{chrp},\Rfunction{fmerge},\Rfunction{mktable},\Rfunction{pathdiagram},\Rfunction{pathdiagram2},\Rfunction{path},\Rfunction{snpPositAnnot},\Rfunction{ucscannot} and 5 datasets: \Robject{beta.data,cad.data,lpd.data,SNP358.data,SNP368annot.data}. \Rfunction{chrp} is used to separate string vector \texttt{hg19} into two numeric vectors: chromosome number and \emph{SNP} chromosome position. Function \Rfunction{fmerge} is used to merge two \emph{GWAS} result datasets into one dataset. Function \Rfunction{mktable} performs \emph{SNP} selection and creates a standard beta table for function \Rfunction{path} to do \emph{MR} and path analyses. Function \Rfunction{pathdiagram} is used to create a path diagram of causal variables onto a given disease or onto outcome. Function \Rfunction{pathdiagram2} can merge two-level \emph{pathdiagrams} into one nested \emph{pathdiagram} where inner \emph{pathdiagram} is a \emph{pathdiagram} of causal variables contributing to outcome and the outside \emph{pathdiagram} is a path diagram of causal variables including outcome onto the disease. The five datasets provide examples for running these functions. \Robject{lpd.data} and \Robject{cad.data} provide an example to create a standard beta dataset for path function to do path analysis and \emph{SNP} data for \emph{SNP} annotation analysis by performing \Rfunction{mktable} and \Rfunction{fmerge}. \Robject{beta.data} are a standard beta dataset for path analysis. \Robject{SNP358.data} provide an example for \Rfunction{snpPositAnnot} to do \emph{SNP} position annotation analysis and \Robject{SNP368annot.data} are for \Rfunction{ucscannot} to perform \emph{SNP} function annotation analysis.
34
+\Rpackage{GMRP} can perform analyses of Mendelian randomization (\emph{MR}),correlation, path of causal variables onto disease of interest and \emph{SNP} annotation analysis. \emph{MR} includes \emph{SNP} selection with given criteria and regression analysis of causal variables on the disease to generate beta values of causal variables on the disease. Using the beta vectors, \Rpackage{GMRP} performs correlation and path analyses to construct  path diagrams of causal variables to the
35
+disease. \Rpackage{GMRP} consists of 8 \emph{R} functions: \Rfunction{chrp},\Rfunction{fmerge},\Rfunction{mktable},\Rfunction{pathdiagram},\Rfunction{pathdiagram2},\Rfunction{path},\Rfunction{snpPositAnnot},\Rfunction{ucscannot} and 5 datasets: \Robject{beta.data,cad.data,lpd.data,SNP358.data,SNP368annot.data}. \Rfunction{chrp} is used to separate string vector \texttt{hg19} into two numeric vectors: chromosome number and \emph{SNP} chromosome position. Function \Rfunction{fmerge} is used to merge two \emph{GWAS} result datasets into one dataset. Function \Rfunction{mktable} performs \emph{SNP} selection and creates a standard beta table for function \Rfunction{path} to do \emph{MR} and path analyses. Function \Rfunction{pathdiagram} is used to create a path diagram of causal variables onto a given disease or onto outcome. Function \Rfunction{pathdiagram2} can merge two-level \emph{pathdiagrams} into one nested \emph{pathdiagram} where inner \emph{pathdiagram} is a \emph{pathdiagram} of causal variables contributing to outcome and the outside \emph{pathdiagram} is a path diagram of causal variables including outcome onto the disease. The five datasets provide examples for running these functions. \Robject{lpd.data} and \Robject{cad.data} provide an example to create a standard beta dataset for path function to do path analysis and \emph{SNP} data for \emph{SNP} annotation analysis by performing \Rfunction{mktable} and \Rfunction{fmerge}. \Robject{beta.data} are a standard beta dataset for path analysis. \Robject{SNP358.data} provide an example for \Rfunction{snpPositAnnot} to do \emph{SNP} position annotation analysis and \Robject{SNP368annot.data} are for \Rfunction{ucscannot} to perform \emph{SNP} function annotation analysis.
23 36
 \end{abstract}
24 37
 
25 38
 \tableofcontents
... ...
@@ -27,14 +40,13 @@ BiocStyle::latex()
27 40
 \section{Introduction}
28 41
 As an example of human disease, coronary artery disease (\emph{CAD}) is one of the causes leading to death and infirmity worldwide~\cite{Murray1997}. Low-density lipoprotein cholesterol (\emph{LDL}) and triglycerides (\emph{TG}) are viewed as risk factors causing \emph{CAD}.  In epidemiological studies, plasma concentrations of increasing \emph{TG} and \emph{LDL} and deceasing high-density lipoprotein cholesterol (\emph{HDL} ) have been observed to be associated with risk for \emph{CAD}~\cite{Angelantonio2009, Sarwar2007}. However, from observational studies, one could not directly infer that these cholesterol concentrations  in plasma are risk factors causing \emph{CAD}~\cite{Do2013, Sarwar2007, Sheehan2007, Voight2012}. A big limitation of observational studies is to difficultly distinguish between causal and spurious associations due to confounding~\cite{Pichler2013}. An efficient approach to overcome this limitation is \textit{Mendelian Randomization} (\emph{MR}) analysis ~\cite{Sheehan2007, Smith2003} where genetic variants are used as instrumental variables. For this reason, many investigators tried to use genetic variants to assess causality and estimate the causal effects on the diseases.    
29 42
 
30
-\emph{MR} analysis can perfectly exclude confounding factors associated with disease.  However, when we expand one causal variable to many, \emph{MR} analysis becomes challenged and complicated because the genetic variant would have additional effects on the other risk factors, which violate assumption of no pleiotropy. An unknown genetic variant in \emph{MR} analysis possibly provides a false instrument for causal effect assessment of risk factors on the disease. The reason is that
31
-if this genetic variant is in \emph{LD} with another gene that is not used but has effect on the disease of study~\cite{Sheehan2007, Sheehan2010}. It then violates the third assumption.  These two problems can be addressed by using multiple instrumental variables.  For this reason, Do \emph{et al}(2013)developed statistic approach to address this issue~\cite{Do2013}. However, the method of Do \emph{et al} ~\cite{Do2013} cannot disentangle correlation effects among the multiple undefined risk factors on the disease of study.  The beta values obtained from regression analyses are not direct causal effects because their effects are entangled with correlations among these undefined risk factors. 
43
+\emph{MR} analysis can perfectly exclude confounding factors associated with disease.  However, when we expand one causal variable to many, \emph{MR} analysis becomes challenged and complicated because the genetic variant would have additional effects on the other risk factors, which violate assumption of no pleiotropy. An unknown genetic variant in \emph{MR} analysis possibly provides a false instrument for causal effect assessment of risk factors on the disease. The reason is that if this genetic variant is in \emph{LD} with another gene that is not used but has effect on the disease of study~\cite{Sheehan2007, Sheehan2010}. It then violates the third assumption.  These two problems can be addressed by using multiple instrumental variables.  For this reason, Do \emph{et al} (2013)developed statistic approach to address this issue~\cite{Do2013}. However, method of Do \emph{et al} ~\cite{Do2013} cannot disentangle correlation effects among the multiple undefined risk factors on the disease of study.  The beta values obtained from regression analyses are not direct causal effects because their effects are entangled with correlations among these undefined risk factors. 
32 44
 
33 45
 The best way to address the entanglement of multiple causal effects is path analysis that was developed by Wright~\cite{Wright1921, Wright1934}. This is because path analysis can dissect beta values into direct and indirect effects of causal variables on the disease. However, path analysis has not broadly been applied to diseases because diseases are usually binary variable. The method of Do ~\cite{Do2013} makes it possible to apply path analysis to disentangle causal effects of undefined risk factors on diseases. For doing so, we here provide \textbf{R package} \Rpackage{GMRP} (\emph{GWAS}-based \emph{MR} and \emph{path analysis}) to solve the above issues.
34 46
 
35 47
 This vignette is intended to give a rapid introduction to the commands used in implementing \emph{MR} analysis, regression analysis, and path analysis, including \emph{SNP} annotation and chromosomal position analysis by means of the \Rpackage{GMRP} package. 
36 48
 
37
-We assume that user has the \emph{GWAS} result data from \emph{GWAS} analysis or \emph{GWAS} meta analysis of \emph{SNP}s associated with risk or confounding factors and a disease of study. If all studied causal variables of \emph{GWAS} data are separately saved in different sheet files, then files are assumed to have the same sheet format and they are required to be merged by using function \Rfunction{fmerg} into one sheet file without disease \emph{GWAS} data. After a standard beta table is created with \Rfunction{mktable}, user can use function \Rfunction{path} to perform \emph{RM} and path analyses.  Using the result of path analysis, user can draw path \Rfunction{plot}\textit{(pathdiagram)} with functions \Rfunction{pathdiagram} and \Rfunction{pathdiagram2}. These will be introduced in detail in the following examples.
49
+We assume that user has the \emph{GWAS} result data from \emph{GWAS} analysis or \emph{GWAS} meta analysis of \emph{SNP}s associated with risk or confounding factors and a disease of study. If all studied causal variables of \emph{GWAS} data are separately saved in different sheet files, then files are assumed to have the same sheet format and they are required to be merged by using function \Rfunction{fmerge} into one sheet file without disease \emph{GWAS} data. After a standard beta table is created with \Rfunction{mktable}, user can use function \Rfunction{path} to perform \emph{RM} and path analyses.  Using the result of path analysis, user can draw path \Rfunction{plot}\textit{(pathdiagram)} with functions \Rfunction{pathdiagram} and \Rfunction{pathdiagram2}. These will be introduced in detail in the following examples.
38 50
 
39 51
 We begin by loading the \Rpackage{GMRP} package.
40 52
 
... ...
@@ -49,9 +61,11 @@ library(GMRP)
49 61
 
50 62
 
51 63
 \section{Loading Data}
52
-\Rpackage{GMRP} provides five data files:   \Robject{beta.data}, \Robject{cad.data}, \Robject{lpd.data}, \Robject{SNP358.data} and \Robject{SNP368annot.data} where
64
+\Rpackage{GMRP} provides five data files:\Robject{beta.data}, \Robject{cad.data}, \Robject{lpd.data}, \Robject{SNP358.data} and \Robject{SNP368annot.data} where
53 65
 
54
-\Robject{lpd.data} was a subset (1069 SNPs) of four GWAS result datasets for \emph{LDL}, \emph{HDL}, \emph{TG} and \emph{TC}. These \emph{GWAS} result data sheets were downloaded from the website\footnote{\url{http://csg.sph.umich.edu//abecasis/public/lipids2013/} } where there are 120165 SNPs scattered on 23 chromosomes and 40 variables. Four GWAS result datasets for \emph{LDL}, \emph{HDL}, \emph{TG} and \emph{TC} were merged into one data sheet by using \Rfunction{fmerg}\textit{(fl1,fl2,ID1,ID2,A,B,method)} where \textit{fl1} and \textit{fl2} are two \emph{GWAS} result data sheets. $ID1$ and $ID2$ are key $id$ in files \textit{fl1} and \textit{fl2}, respectively, and required. $A$ and $B$ are postfix for \textit{fl1} and \textit{fl2}. Default values are $A$="" and $B$="". \textit{method} is method for merging . In the current version, there are four methods: \textit{method}="No" or "no" or "NO" or "N" or "n" means that  the data with unmatched \emph{SNP}s in \textit{file1} and \textit{file 2} are not saved in the merged file; \textit{method}="ALL" or "All" or "all" or "A" or "a" indicates that the data with all unmatched \emph{SNP}s in \textit{file 1} and \textit{file 2} are saved in the unpaired way in the merged data file; if \textit{method}="\textit{file1}", then those with unmatched \emph{SNP}s only from file1 are saved or if \textit{method}="\textit{file2}", \Rfunction{fmerg} will save the data with unmatched \emph{SNP}s only from \textit{file2}". Here is a simple example:
66
+\Robject{lpd.data} was a subset (1069 SNPs) of four GWAS result datasets for \emph{LDL}, \emph{HDL}, \emph{TG} and \emph{TC}. These \emph{GWAS} result data sheets were downloaded from the website\footnote{\url{http://csg.sph.umich.edu//abecasis/public/lipids2013/}} where there are 120165 SNPs on 23 chromosomes and 40 variables. Four GWAS result datasets for \emph{LDL}, \emph{HDL}, \emph{TG} and \emph{TC} were merged into one data sheet by
67
+using\Rfunction{fmerge}\textit{(fl1,fl2,ID1,ID2,A,B,method)} where \textit{fl1} and \textit{fl2} are two \emph{GWAS} result data sheets. $ID1$ and $ID2$ are key $id$ in files \textit{fl1} and \textit{fl2}, respectively, and required. $A$ and $B$ are respectily postfix for \textit{fl1} and \textit{fl2}. Default values are $A$="" and $B$="". \textit{method} is method for merging . In the current version, there are four methods: \textit{method}="No" or "no" or "NO" or "N" or "n" means that
68
+the data with unmatched \emph{SNP}s in \textit{file1} and \textit{file 2} are not saved in the merged file; \textit{method}="ALL" or "All" or "all" or "A" or "a" indicates that the data with all unmatched \emph{SNP}s in \textit{file 1} and \textit{file 2} are saved in the unpaired way in the merged data file; If \textit{method}=\textit{"file1"}, then those with unmatched \emph{SNP}s only from file1 are saved or if \textit{method}=\textit{"file2"}, \Rfunction{fmerge} will save the data with unmatched \emph{SNP}s only from \textit{file2}". Here is a simple example:
55 69
 
56 70
 <<fmerge,keep.source=TRUE, eval=FALSE>>=
57 71
 data1 <- matrix(NA, 20, 4)
... ...
@@ -79,24 +93,24 @@ User can take the following approach to merge all four lipid files into a data s
79 93
 
80 94
 \textit{lpd<-fmerge(fl1=LDL\underline{}HDL,fl2=TG\underline{}TC,ID1="SNP",ID2="SNP",A="",B="",method="No")}
81 95
 
82
-\Robject{cad.data} was also a subset (1069 SNPs) of original GWAS meta-analyzed dataset that was downloaded from the website\footnote{\url{http://www.cardiogramplusc4d.org/downloads/} } and contains 2420360 \emph{SNP}s and 12 variables. 
96
+\Robject{cad.data} was also a subset (1069 SNPs) of original GWAS meta-analyzed dataset that was downloaded from the website\footnote{\url{http://www.cardiogramplusc4d.org/downloads/}} and contains 2420360 \emph{SNP}s and 12 variables. 
83 97
 
84
-\Robject{beta.data}  that was created by using function \Rfunction{mktable} and \Rfunction{fmerg} from \Robject{lpd.data} and \Robject{cad.data} is a standard beta table for \emph{MR} and path analyses.
98
+\Robject{beta.data}  that was created by using function \Rfunction{mktable} and \Rfunction{fmerge} from \Robject{lpd.data} and \Robject{cad.data} is a standard beta table for \emph{MR} and path analyses.
85 99
  
86 100
 \Robject{SNP358.data} contains 358 \emph{SNP}s selected by \Rfunction{mktable} for \emph{SNP} position annotation analysis.
87 101
 
88
-\Robject{SNP368annot.data} is the data obtained from function analysis with \href{http://snp-nexus.org/index.html}{\emph{SNP} Annotation Tool} and provides example of performing function \Rfunction{ucscanno} to draw a \texttt{3D} pie and output the results of proportions of \emph{SNP}s coming from gene function various elements. 
102
+\Robject{SNP368annot.data} is the data obtained from function analysis with \url{http://snp-nexus.org/index.html}{\emph{SNP} Annotation Tool} and provides example of performing function \Rfunction{ucscanno} to draw a \texttt{3D} pie and output the results of proportions of \emph{SNP}s coming from gene function various elements. 
89 103
   
90 104
 <<>>=
91 105
 data(cad.data)
92
-cad <- DataFrame(cad.data)
106
+#cad <- DataFrame(cad.data)
93 107
 cad<-cad.data
94 108
 head(cad)
95 109
 @
96 110
 
97 111
 <<>>=
98 112
 data(lpd.data)
99
-lpd <- DataFrame(lpd.data)
113
+#lpd <- DataFrame(lpd.data)
100 114
 lpd<-lpd.data
101 115
 head(lpd)
102 116
 @
... ...
@@ -127,7 +141,7 @@ We use function \Rfunction{mktable} to choose \emph{SNP}s and make a standard be
127 141
  
128 142
 The standard beta table will be created via 15 steps:
129 143
 
130
-Step1: calculate $pv_j$
144
+Step1: calculate $pvj$
131 145
 
132 146
 <<Step1,keep.source=TRUE, eval=FALSE>>=
133 147
 pvalue.LDL <- lpd$P.value.LDL
... ...
@@ -156,7 +170,7 @@ a1.TC <- lpd$A1.TC
156 170
 alle1 <- cbind(a1.LDL, a1.HDL, a1.TG, a1.TC)
157 171
 @
158 172
 
159
-Step4: give sample sizes of causal variables and calculate $pc_j$
173
+Step4: give sample sizes of causal variables and calculate $pcj$
160 174
 <<Step4, keep.source=TRUE, eval=FALSE>>=
161 175
 N.LDL <- lpd$N.LDL
162 176
 N.HDL <- lpd$N.HDL
... ...
@@ -176,7 +190,7 @@ freq.TC<-lpd$Freq.A1.1000G.EUR.TC
176 190
 freq<-cbind(freq.LDL,freq.HDL,freq.TG,freq.TC)
177 191
 @
178 192
  
179
-Step6: construct matrix for $sd$ of each causal variable (here $sd$ is not specific to $SNP_j$). The following $sd$ values for \emph{LDL, HDL,TG} and \emph{TC} were means of standard deviations of these lipoprotein concentrations in plasma over 63 studies from Willer \emph{et al}~\cite{Willer2013}. 
193
+Step6: construct matrix for $sd$ of each causal variable (here $sd$ is not specific to \emph{SNP} $j$). The following $sd$ values for \emph{LDL, HDL,TG} and \emph{TC} were means of standard deviations of these lipoprotein concentrations in plasma over 63 studies from Willer \emph{et al}~\cite{Willer2013}. 
180 194
 <<Step6, keep.source=TRUE, eval=FALSE>>=
181 195
 sd.LDL <- rep(37.42, length(pvj))
182 196
 sd.HDL <- rep(14.87, length(pvj))
... ...
@@ -185,7 +199,7 @@ sd.TC <- rep(42.74, length(pvj))
185 199
 sd <- cbind(sd.LDL, sd.HDL, sd.TG, sd.TC)
186 200
 @
187 201
 
188
-Step7:  \emph{SNPID} and position:
202
+Step7:  \emph{SNPID} and position are retrieved from \Robject{lpd} data:
189 203
 <<Step7, keep.source=TRUE, eval=FALSE>>=
190 204
 hg19 <- lpd$SNP_hg19.HDL
191 205
 rsid <- lpd$rsid.HDL
... ...
@@ -203,7 +217,7 @@ newdata<-cbind(chr,rsid,alle1,as.data.frame(newdata))
203 217
 dim(newdata)
204 218
 @
205 219
  
206
-Step10: retrieve data from \Robject{cad} and calculate $pd_j$ and frequency of coronary artery disease: \textit{freq.case} in population:
220
+Step10: retrieve data from \Robject{cad} and calculate $pdj$ and frequency of coronary artery disease\textit{cad}, \textit{freq.case} in case population:
207 221
 <<Step10,keep.source=TRUE, eval=FALSE>>=
208 222
 hg18.d <- cad$chr_pos_b36
209 223
 SNP.d <- cad$SNP #SNPID
... ...
@@ -387,7 +401,7 @@ Note that in the current version, \Rpackage{GMRP} can just create two-level nest
387 401
 
388 402
 <<>>=
389 403
 data(SNP358.data)
390
-SNP358 <- DataFrame(SNP358.data)
404
+SNP358 <- as.data.frame(SNP358.data)
391 405
 head(SNP358)
392 406
 @
393 407
 
... ...
@@ -396,18 +410,15 @@ head(SNP358)
396 410
 library(graphics)
397 411
 @
398 412
 
399
-With SNP data \Robject{SNP358}, we can perform \emph{SNP} position annotation using function \Rfunction{snpposit}\textit{(SNPdata,SNP\underline{}hg19,LG,main,maxd)} where 
413
+With SNP data \Robject{SNP358}, we can perform \emph{SNP} position annotation using function \Rfunction{snpPositAnnot} \textit{(SNPdata,SNP\underline{}hg19,main)} where 
400 414
 
401 415
 \textit{SNPdata} is R object that may be \emph{hg19} that is a string vector(\textit{chr}\#\#.\#\#\#\#\#\#\#\#) or two numeric vectors (chromosome number and \emph{SNP} position).
402 416
 
403
-\textit{SNP\underline{}hg19} is a string parameter. It may be "\textit{hg19}" or "\textit{chr}". If \textit{SNP\underline{}hg19}="\textit{hg19}",then \textit{SNPdata} contains a string vector of \textit{hg19} or if \textit{SNP\underline{}hg19}="\textit{chr}", then \textit{SNPdata} consists of at lest two numeric columns: \textit{chr} and \textit{posit}. \textit{chr} is chromosome number and \textit{posit} is \emph{SNP} physical position on chromosomes. Note that "\textit{chr}" and "\textit{posit}" are required column names in \textit{SNPdata} if \textit{SNP\underline{}hg19} ="\textit{chr}".
404
-
405
-\emph{LG} is a numeric parameter that gives maximum permissible distance between positions. Its default is 10.
417
+\textit{SNP\underline{}hg19} is a string parameter. It may be \textit{"hg19"} or \textit{"chr"}. If \textit{SNP\underline{}hg19}=\textit{"hg19"},then \textit{SNPdata} contains a string vector of \textit{hg19} or if \textit{SNP\underline{}hg19}=\textit{"chr"}, then \textit{SNPdata} consists of at lest two numeric columns: \textit{chr} and \textit{posit}. \textit{chr} is chromosome number and \textit{posit} is \emph{SNP} physical position on chromosomes.
418
+Note that \textit{"chr"} and \textit{"posit"} are required column names in \textit{SNPdata} if \textit{SNP\underline{}hg19} =\textit{"chr"}.
406 419
 
407 420
 \textit{main} is a string which is title of graph. If no title is given, then man="". Its default is "A".
408 421
 
409
-\textit{maxd} is a numeric parameter that is maximum distance for truncating chromosome columns. If there are not big differences among 23 chromosomes, then \textit{maxd} can be set to be larger than 2000$kbp$. Its default is  2000$kbp$.
410
-
411 422
 <<fig=FALSE,keep.source=TRUE, label=ChromHistogram>>=
412 423
 snpPositAnnot(SNPdata=SNP358,SNP_hg19="chr",main="A")
413 424
 @
... ...
@@ -416,23 +427,23 @@ snpPositAnnot(SNPdata=SNP358,SNP_hg19="chr",main="A")
416 427
 <<label=figChromHistogram, fig=TRUE,echo=FALSE>>=
417 428
 <<ChromHistogram>>
418 429
 @ 
419
-\caption{ Chromosomal histogram of 358 selected \emph{SNP}s. Averaged lengths of \emph{SNP} intervals on chromosome mean that the \emph{SNP}s on a chromosome have their averaged lengths of intervals between them.  All averaged lengths over 2000kb on chromosomes were truncated, the \emph{SNP}s on these chromosomes have at least more than 2000$kbp$ averaged length of interval. Numbers above \textit{chr} columns are numbers of \emph{SNP} distributed on the chromosomes}
430
+\caption{ Chromosomal histogram of 358 selected \emph{SNP}s. Averaged lengths of \emph{SNP} intervals on chromosome mean that the \emph{SNP}s on a chromosome have their averaged lengths of intervals between them.  All averaged lengths over 2000kb on chromosomes were truncated, the \emph{SNP}s on these chromosomes have at least 2000$kbp$ length of interval. Numbers above \textit{chr} columns are numbers of \emph{SNP} distributed on the chromosomes}
420 431
 \label{figure4}
421 432
 \end{center}
422 433
 \end{figure}
423 434
 
424 435
 SNP function annotation analysis has two steps:
425 436
 
426
-Step 1: copy \emph{SNP ID}s selected to \textbf{Batch Query} box in\href{http://snp-nexus.org/index.html}{\emph{SNP} Annotation Tool}. After setting parameters and running by clicking \emph{run button}, SNP annotation result will be obtained after running for a while. Choose consequence sheet of \emph{UCSC} and copy the results to excel sheet,"\emph{Predicted function}" column name is changed to "\emph{function\underline{}unit}" name and save it as \textit{csv} format. 
437
+Step 1: copy \emph{SNP ID}s selected to \textbf{Batch Query} box in\href{http://snp-nexus.org/index.html}{\emph{SNP} Annotation Tool}. After setting parameters and running by clicking \emph{run button}, SNP annotation result will be obtained after running for a while. Choose consequence sheet of \emph{UCSC} and copy the results to excel sheet,\emph{"Predicted function"} column name is changed to \emph{"function\underline{}unit"} name and save it as \textit{csv} format. 
427 438
 
428 439
 Step2: input the \textit{csv} file into \emph{R Console} using \textbf{R} function \Rfunction{read.csv}. In \Rpackage{GMRP} package, we have provided data for \emph{SNP} function annotation analysis. 
429 440
 <<>>=
430 441
 data(SNP368annot.data)
431
-SNP368<-DataFrame(SNP368annot.data)
442
+SNP368<-as.data.frame(SNP368annot.data)
432 443
 SNP368[1:10, ]
433 444
 @
434 445
 
435
-We perform function \Rfunction{ucscannot} to summarize proportions of SNPs coming from gene various elements such as code region, introns, etc, and then create 3D pie with \Rfunction{pie3D} of \Rpackage{plotrix}. 
446
+We perform function \Rfunction{ucscannot} to summarize proportions of SNPs coming from gene various elements such as code region, introns, etc, and then create 3D pie using \Rfunction{pie3D} of \Rpackage{plotrix}. 
436 447
 <<>>=
437 448
 library(plotrix)
438 449
 @
... ...
@@ -447,7 +458,7 @@ $A$ is numeric parameter for title size, default=2.5.
447 458
 
448 459
 $B$ is numeric parameter for label size, default=1.5.
449 460
 
450
-$C$ is numeric parameter for \emph{labelrad} distance,default=0.1.
461
+$C$ is numeric parameter for \emph{labelrad} distance,default=1.3.
451 462
 
452 463
 \textit{method} is numeric parameter for choosing figure output methods. It has two options: method=1 has no legend but color and pie components are labeled with gene elements, method=2 has legend over pie. The default = 1.	
453 464