Thank you to lintr package
... | ... |
@@ -32,7 +32,7 @@ |
32 | 32 |
#' @param plotResults logical value, TRUE by default. |
33 | 33 |
#' This determines what is returned. If \code{plotResults = FALSE}, a |
34 | 34 |
#' data frame is returned with the Sequence(s), Average Scaled Hydropathy, |
35 |
-#' and Average Net Charge. |
|
35 |
+#' and Average Net Charge. |
|
36 | 36 |
#' If \code{plotResults = TRUE}, a graphical output is returned (ggplot) |
37 | 37 |
#' showing the Charge Hydropathy Plot (recommended). |
38 | 38 |
#' @param ... additional arguments to be passed to |
... | ... |
@@ -138,7 +138,7 @@ chargeHydropathyPlot <- function( |
138 | 138 |
dataCollected$sequence <- do.call(rbind, sequenceList) |
139 | 139 |
dataCollected$avg_scaled_hydropathy <- do.call(rbind, hydropathyList) |
140 | 140 |
dataCollected$avg_net_charge <- do.call(rbind, chargeList) |
141 |
- |
|
141 |
+ |
|
142 | 142 |
if (!plotResults) { |
143 | 143 |
return(dataCollected) |
144 | 144 |
} |
... | ... |
@@ -3,27 +3,25 @@ |
3 | 3 |
#' This is used to calculate the prediction of intrinsic disorder based on |
4 | 4 |
#' the scaled hydropathy and absolute net charge of an amino acid |
5 | 5 |
#' sequence using a sliding window. FoldIndex described this relationship and |
6 |
-#' implemented it graphically in 2005 by Prilusky, Felder, et al, |
|
6 |
+#' implemented it graphically in 2005 by Prilusky, Felder, et al, |
|
7 | 7 |
#' and this tool has been implemented |
8 |
-#' into multiple disorder prediction programs. When windows have a negative |
|
9 |
-#' score (<0) sequences are predicted as disordered. |
|
10 |
-#' When windows have a positive score (>0) sequences are predicted as |
|
11 |
-#' disordered. Graphically, this cutoff is displayed by the dashed |
|
8 |
+#' into multiple disorder prediction programs. When windows have a negative |
|
9 |
+#' score (<0) sequences are predicted as disordered. |
|
10 |
+#' When windows have a positive score (>0) sequences are predicted as |
|
11 |
+#' disordered. Graphically, this cutoff is displayed by the dashed |
|
12 | 12 |
#' line at y = 0. Calculations are at pH 7.0 based on the described method and |
13 |
-#' the default is a sliding window of size 51. |
|
14 |
-#' |
|
13 |
+#' the default is a sliding window of size 51. |
|
14 |
+#' |
|
15 | 15 |
#' The output is either a data frame or graph |
16 | 16 |
#' showing the calculated scores for each window along the sequence. |
17 | 17 |
#' The equation used was originally described in Uversky et al. (2000)\cr |
18 | 18 |
#' \url{https://doi.org/10.1002/1097-0134(20001115)41:3<415::AID-PROT130>3.0.CO;2-7} |
19 | 19 |
#' . \cr |
20 |
-#' |
|
21 |
-#' The FoldIndex method of using a sliding window and utilizing the uversky |
|
20 |
+#' The FoldIndex method of using a sliding window and utilizing the Uversky |
|
22 | 21 |
#' equation is described in Prilusky, J., Felder, C. E., et al. (2005). \cr |
23 |
-#' FoldIndex: a simple tool to predict whether a given protein sequence \cr |
|
22 |
+#' FoldIndex: a simple tool to predict whether a given protein sequence \cr |
|
24 | 23 |
#' is intrinsically unfolded. Bioinformatics, 21(16), 3435-3438. \cr |
25 |
-#' |
|
26 |
-#' |
|
24 |
+#' |
|
27 | 25 |
#' @inheritParams sequenceCheck |
28 | 26 |
#' @inheritParams chargeCalculationLocal |
29 | 27 |
#' @param window a positive, odd integer. 51 by default. |
... | ... |
@@ -42,9 +40,7 @@ |
42 | 40 |
#' @seealso \code{\link{KDNorm}} for residue hydropathy values. |
43 | 41 |
#' See \code{\link{pKaData}} for residue pKa values and citations. See |
44 | 42 |
#' \code{\link{hendersonHasselbalch}} for charge calculations. |
45 |
-#' @references Kyte, J., & Doolittle, R. F. (1982). A simple method for |
|
46 |
-#' displaying the hydropathic character of a protein. |
|
47 |
-#' Journal of molecular biology, 157(1), 105-132. |
|
43 |
+ |
|
48 | 44 |
#' @section Plot Colors: |
49 | 45 |
#' For users who wish to keep a common aesthetic, the following colors are |
50 | 46 |
#' used when plotResults = TRUE. \cr |
... | ... |
@@ -53,15 +49,18 @@ |
53 | 49 |
#' \item Close to -1 = "#9672E6" |
54 | 50 |
#' \item Close to 1 = "#D1A63F" |
55 | 51 |
#' \item Close to midpoint = "grey65" or "#A6A6A6"}} |
56 |
-#' |
|
57 |
-#' @references |
|
52 |
+#' |
|
53 |
+#' @references |
|
54 |
+#' Kyte, J., & Doolittle, R. F. (1982). A simple method for |
|
55 |
+#' displaying the hydropathic character of a protein. |
|
56 |
+#' Journal of molecular biology, 157(1), 105-132. |
|
58 | 57 |
#' Kozlowski, L. P. (2016). IPC – Isoelectric Point Calculator. Biology |
59 | 58 |
#' Direct, 11(1), 55. \url{https://doi.org/10.1186/s13062-016-0159-9} \cr |
60 | 59 |
#' Kyte, J., & Doolittle, R. F. (1982). A simple method for |
61 | 60 |
#' displaying the hydropathic character of a protein. |
62 | 61 |
#' Journal of molecular biology, 157(1), 105-132. \cr |
63 | 62 |
#' Prilusky, J., Felder, C. E., et al. (2005). \cr |
64 |
-#' FoldIndex: a simple tool to predict whether a given protein sequence \cr |
|
63 |
+#' FoldIndex: a simple tool to predict whether a given protein sequence \cr |
|
65 | 64 |
#' is intrinsically unfolded. Bioinformatics, 21(16), 3435-3438. \cr |
66 | 65 |
#' Uversky, V. N., Gillespie, J. R., & Fink, A. L. (2000). |
67 | 66 |
#' Why are “natively unfolded” proteins unstructured under physiologic |
... | ... |
@@ -71,34 +70,30 @@ |
71 | 70 |
#' @export |
72 | 71 |
|
73 | 72 |
foldIndexR <- function(sequence, |
74 |
- window = 51, |
|
73 |
+ window = 51, |
|
75 | 74 |
proteinName = NA, |
76 | 75 |
pKaSet = "IPC_protein", |
77 | 76 |
plotResults = TRUE, |
78 | 77 |
...) { |
79 |
- |
|
80 | 78 |
chargeDF <- |
81 | 79 |
chargeCalculationLocal(sequence = sequence, window = window, |
82 |
- pH = 7.0, pKaSet = pKaSet, |
|
80 |
+ pH = 7.0, pKaSet = pKaSet, |
|
83 | 81 |
plotResults = FALSE) |
84 | 82 |
chargeDF$scaledWindowCharge <- chargeDF$windowCharge / window |
85 |
- hydropDF <- scaledHydropathyLocal(sequence = sequence, |
|
83 |
+ hydropDF <- scaledHydropathyLocal(sequence = sequence, |
|
86 | 84 |
window = window, |
87 | 85 |
plotResults = FALSE) |
88 | 86 |
mergeDF <- merge(hydropDF, chargeDF) |
89 |
- |
|
90 |
- mergeDF$foldIndex <- |
|
91 |
- mergeDF$WindowHydropathy * 2.785 - |
|
87 |
+ mergeDF$foldIndex <- |
|
88 |
+ mergeDF$WindowHydropathy * 2.785 - |
|
92 | 89 |
abs(mergeDF$scaledWindowCharge) - 1.151 |
93 |
- |
|
94 | 90 |
if (plotResults) { |
95 | 91 |
plotTitle <- "FoldIndex Prediction of Intrinsic Disorder" |
96 | 92 |
if (!is.na(proteinName)) { |
97 |
- plotTitle <- |
|
98 |
- paste0("FoldIndex Prediction of Intrinsic Disorder in ", |
|
93 |
+ plotTitle <- |
|
94 |
+ paste0("FoldIndex Prediction of Intrinsic Disorder in ", |
|
99 | 95 |
proteinName, sep = "") |
100 | 96 |
} |
101 |
- |
|
102 | 97 |
gg <- sequencePlot(position = mergeDF$Position, |
103 | 98 |
property = mergeDF$foldIndex, |
104 | 99 |
hline = 0, dynamicColor = mergeDF$foldIndex, |
... | ... |
@@ -109,5 +104,4 @@ foldIndexR <- function(sequence, |
109 | 104 |
} else { |
110 | 105 |
return(mergeDF) |
111 | 106 |
} |
112 |
- |
|
113 | 107 |
} |
... | ... |
@@ -1,15 +1,15 @@ |
1 | 1 |
#' idpr: profiling and analyzing Intrinsically Disordered Proteins in R |
2 | 2 |
#' |
3 |
-#' idpr aims to integrate tools for the computational analysis of |
|
4 |
-#' intrinsically disordered proteins (IDPs) within R. This package is used to |
|
5 |
-#' identify known characteristics of IDPs for a sequence of interest with |
|
6 |
-#' easily reported and dynamic results. Additionally, this package includes |
|
7 |
-#' tools for IDP-based sequence analysis to be used in conjunction with other R |
|
8 |
-#' packages. |
|
3 |
+#' idpr aims to integrate tools for the computational analysis of |
|
4 |
+#' intrinsically disordered proteins (IDPs) within R. This package is used to |
|
5 |
+#' identify known characteristics of IDPs for a sequence of interest with |
|
6 |
+#' easily reported and dynamic results. Additionally, this package includes |
|
7 |
+#' tools for IDP-based sequence analysis to be used in conjunction with other R |
|
8 |
+#' packages. |
|
9 | 9 |
#' \cr |
10 | 10 |
#' Please see the idpr vignettes for details on idpr functions and theory. |
11 | 11 |
#' \code{browseVignettes("idpr")} |
12 | 12 |
#' @docType package |
13 | 13 |
#' @name idpr |
14 | 14 |
NULL |
15 |
-#> NULL |
|
16 | 15 |
\ No newline at end of file |
16 |
+#> NULL |
... | ... |
@@ -86,10 +86,10 @@ sequenceCheck <- function( |
86 | 86 |
if (!all(is.character(outputType), is.character(method))) { |
87 | 87 |
stop("Error: method and outputType must be character vectors,") |
88 | 88 |
} |
89 |
- if (!any(is.character(sequence), |
|
90 |
- (is(sequence)[1] %in% c("AAString", "BString", |
|
89 |
+ if (!any(is.character(sequence), |
|
90 |
+ (is(sequence)[1] %in% c("AAString", "BString", |
|
91 | 91 |
"AAStringSet", "BStringSet")) |
92 |
- )){ |
|
92 |
+ )) { |
|
93 | 93 |
stop("Error: sequence must be a character vector or an AAString Object") |
94 | 94 |
} |
95 | 95 |
if (!(method %in% c("stop", "warn"))) { |
... | ... |
@@ -98,14 +98,14 @@ sequenceCheck <- function( |
98 | 98 |
} |
99 | 99 |
#----- |
100 | 100 |
#This section will confirm what to do with the amino acid sequence |
101 |
- if(is(sequence)[1] %in% c("AAString", "BString", |
|
102 |
- "AAStringSet", "BStringSet")){ |
|
101 |
+ if (is(sequence)[1] %in% c("AAString", "BString", |
|
102 |
+ "AAStringSet", "BStringSet")) { |
|
103 | 103 |
sequence <- as.character(sequence) |
104 | 104 |
} |
105 | 105 |
if (length(sequence) == 1) { |
106 | 106 |
#this is to see if the string is a .fasta / .fa file |
107 | 107 |
if (grepl("\\.fa", sequence, ignore.case = TRUE)) { |
108 |
- sequence <- Biostrings::readAAStringSet(sequence, format="fasta") |
|
108 |
+ sequence <- Biostrings::readAAStringSet(sequence, format = "fasta") |
|
109 | 109 |
sequence <- as.character(sequence) |
110 | 110 |
} |
111 | 111 |
separatedSequence <- strsplit(sequence, "") |
... | ... |
@@ -342,7 +342,7 @@ sequenceMap <- function( |
342 | 342 |
if (plyr::is.discrete(seqDF$Property)) { |
343 | 343 |
gg <- gg + ggplot2::scale_fill_manual(values = customColors) |
344 | 344 |
} else { |
345 |
- gg <- gg + |
|
345 |
+ gg <- gg + |
|
346 | 346 |
ggplot2::scale_fill_gradient2( |
347 | 347 |
high = customColors[1], |
348 | 348 |
low = customColors[2], |
... | ... |
@@ -44,7 +44,7 @@ The environmental pH is used to calculate residue charge.} |
44 | 44 |
\item{plotResults}{logical value, TRUE by default. |
45 | 45 |
This determines what is returned. If \code{plotResults = FALSE}, a |
46 | 46 |
data frame is returned with the Sequence(s), Average Scaled Hydropathy, |
47 |
-and Average Net Charge. |
|
47 |
+and Average Net Charge. |
|
48 | 48 |
If \code{plotResults = TRUE}, a graphical output is returned (ggplot) |
49 | 49 |
showing the Charge Hydropathy Plot (recommended).} |
50 | 50 |
|
... | ... |
@@ -51,24 +51,24 @@ see plotResults argument |
51 | 51 |
This is used to calculate the prediction of intrinsic disorder based on |
52 | 52 |
the scaled hydropathy and absolute net charge of an amino acid |
53 | 53 |
sequence using a sliding window. FoldIndex described this relationship and |
54 |
- implemented it graphically in 2005 by Prilusky, Felder, et al, |
|
54 |
+ implemented it graphically in 2005 by Prilusky, Felder, et al, |
|
55 | 55 |
and this tool has been implemented |
56 |
- into multiple disorder prediction programs. When windows have a negative |
|
57 |
- score (<0) sequences are predicted as disordered. |
|
58 |
- When windows have a positive score (>0) sequences are predicted as |
|
59 |
- disordered. Graphically, this cutoff is displayed by the dashed |
|
56 |
+ into multiple disorder prediction programs. When windows have a negative |
|
57 |
+ score (<0) sequences are predicted as disordered. |
|
58 |
+ When windows have a positive score (>0) sequences are predicted as |
|
59 |
+ disordered. Graphically, this cutoff is displayed by the dashed |
|
60 | 60 |
line at y = 0. Calculations are at pH 7.0 based on the described method and |
61 |
- the default is a sliding window of size 51. |
|
62 |
- |
|
63 |
- The output is either a data frame or graph |
|
61 |
+ the default is a sliding window of size 51. |
|
62 |
+} |
|
63 |
+\details{ |
|
64 |
+The output is either a data frame or graph |
|
64 | 65 |
showing the calculated scores for each window along the sequence. |
65 | 66 |
The equation used was originally described in Uversky et al. (2000)\cr |
66 | 67 |
\url{https://doi.org/10.1002/1097-0134(20001115)41:3<415::AID-PROT130>3.0.CO;2-7} |
67 | 68 |
. \cr |
68 |
- |
|
69 |
- The FoldIndex method of using a sliding window and utilizing the uversky |
|
69 |
+ The FoldIndex method of using a sliding window and utilizing the Uversky |
|
70 | 70 |
equation is described in Prilusky, J., Felder, C. E., et al. (2005). \cr |
71 |
- FoldIndex: a simple tool to predict whether a given protein sequence \cr |
|
71 |
+ FoldIndex: a simple tool to predict whether a given protein sequence \cr |
|
72 | 72 |
is intrinsically unfolded. Bioinformatics, 21(16), 3435-3438. \cr |
73 | 73 |
} |
74 | 74 |
\section{Plot Colors}{ |
... | ... |
@@ -80,15 +80,19 @@ This is used to calculate the prediction of intrinsic disorder based on |
80 | 80 |
\item Close to -1 = "#9672E6" |
81 | 81 |
\item Close to 1 = "#D1A63F" |
82 | 82 |
\item Close to midpoint = "grey65" or "#A6A6A6"}} |
83 |
- |
|
84 |
- @references |
|
83 |
+} |
|
84 |
+ |
|
85 |
+\references{ |
|
86 |
+Kyte, J., & Doolittle, R. F. (1982). A simple method for |
|
87 |
+ displaying the hydropathic character of a protein. |
|
88 |
+ Journal of molecular biology, 157(1), 105-132. |
|
85 | 89 |
Kozlowski, L. P. (2016). IPC – Isoelectric Point Calculator. Biology |
86 | 90 |
Direct, 11(1), 55. \url{https://doi.org/10.1186/s13062-016-0159-9} \cr |
87 | 91 |
Kyte, J., & Doolittle, R. F. (1982). A simple method for |
88 | 92 |
displaying the hydropathic character of a protein. |
89 | 93 |
Journal of molecular biology, 157(1), 105-132. \cr |
90 | 94 |
Prilusky, J., Felder, C. E., et al. (2005). \cr |
91 |
- FoldIndex: a simple tool to predict whether a given protein sequence \cr |
|
95 |
+ FoldIndex: a simple tool to predict whether a given protein sequence \cr |
|
92 | 96 |
is intrinsically unfolded. Bioinformatics, 21(16), 3435-3438. \cr |
93 | 97 |
Uversky, V. N., Gillespie, J. R., & Fink, A. L. (2000). |
94 | 98 |
Why are “natively unfolded” proteins unstructured under physiologic |
... | ... |
@@ -96,12 +100,6 @@ This is used to calculate the prediction of intrinsic disorder based on |
96 | 100 |
415-427. |
97 | 101 |
\url{https://doi.org/10.1002/1097-0134(20001115)41:3<415::AID-PROT130>3.0.CO;2-7} |
98 | 102 |
} |
99 |
- |
|
100 |
-\references{ |
|
101 |
-Kyte, J., & Doolittle, R. F. (1982). A simple method for |
|
102 |
- displaying the hydropathic character of a protein. |
|
103 |
- Journal of molecular biology, 157(1), 105-132. |
|
104 |
-} |
|
105 | 103 |
\seealso{ |
106 | 104 |
\code{\link{KDNorm}} for residue hydropathy values. |
107 | 105 |
See \code{\link{pKaData}} for residue pKa values and citations. See |
... | ... |
@@ -5,12 +5,12 @@ |
5 | 5 |
\alias{idpr} |
6 | 6 |
\title{idpr: profiling and analyzing Intrinsically Disordered Proteins in R} |
7 | 7 |
\description{ |
8 |
-idpr aims to integrate tools for the computational analysis of |
|
9 |
-intrinsically disordered proteins (IDPs) within R. This package is used to |
|
10 |
-identify known characteristics of IDPs for a sequence of interest with |
|
11 |
-easily reported and dynamic results. Additionally, this package includes |
|
12 |
-tools for IDP-based sequence analysis to be used in conjunction with other R |
|
13 |
-packages. |
|
8 |
+idpr aims to integrate tools for the computational analysis of |
|
9 |
+intrinsically disordered proteins (IDPs) within R. This package is used to |
|
10 |
+identify known characteristics of IDPs for a sequence of interest with |
|
11 |
+easily reported and dynamic results. Additionally, this package includes |
|
12 |
+tools for IDP-based sequence analysis to be used in conjunction with other R |
|
13 |
+packages. |
|
14 | 14 |
\cr |
15 | 15 |
Please see the idpr vignettes for details on idpr functions and theory. |
16 | 16 |
\code{browseVignettes("idpr")} |
... | ... |
@@ -11,7 +11,7 @@ vignette: > |
11 | 11 |
knitr::opts_chunk$set( |
12 | 12 |
collapse = TRUE, |
13 | 13 |
comment = "#>", |
14 |
- fig.width = 6, |
|
14 |
+ fig.width = 6, |
|
15 | 15 |
fig.height = 4 |
16 | 16 |
) |
17 | 17 |
``` |
... | ... |
@@ -34,7 +34,6 @@ tend to be aliphatic, hydrophobic, aromatic, or form tertiary structures |
34 | 34 |
Therefore, there is a distinct difference of biochemistry |
35 | 35 |
between IDPs and ordered proteins. |
36 | 36 |
|
37 |
- |
|
38 | 37 |
It was shown in Uversky, Gillespie, & Fink (2000) that both high net charge and |
39 | 38 |
low mean hydropathy are properties of IDPs. One explanation is that a high net |
40 | 39 |
charge leads to increased repulsion of residues causing an extended structure |
... | ... |
@@ -78,7 +77,7 @@ This was described in Prilusky, J., Felder, C. E., et al. (2005). |
78 | 77 |
The idpr package can be installed from Bioconductor with the following line of |
79 | 78 |
code. It requires the BiocManager package to be installed |
80 | 79 |
```{r} |
81 |
-#BiocManager::install("idpr") |
|
80 |
+#BiocManager::install("idpr") |
|
82 | 81 |
``` |
83 | 82 |
|
84 | 83 |
The most recent version of the package can be installed with the following line |
... | ... |
@@ -185,7 +184,7 @@ print(TP53_Sequences) |
185 | 184 |
```{r} |
186 | 185 |
gg <- chargeHydropathyPlot( |
187 | 186 |
sequence = TP53_Sequences, |
188 |
- pKaSet = "IPC_protein") |
|
187 |
+ pKaSet = "IPC_protein") |
|
189 | 188 |
plot(gg) |
190 | 189 |
``` |
191 | 190 |
|
... | ... |
@@ -206,12 +205,19 @@ chargeHydropathyPlot( |
206 | 205 |
|
207 | 206 |
## Using FoldIndexR to predict folded and unfolded windows. |
208 | 207 |
|
208 |
+Predictions are made on a scale of -1 to 1, where any residues with |
|
209 |
+a negative score are predicted disordered (green; under 0), |
|
210 |
+and any residue with a positive score are predicted ordered (purple; above 0). |
|
211 |
+ |
|
212 |
+Functionally, this uses a large sliding window, (default 51) as described in |
|
213 |
+Prilusky, J., Felder, C. E., et al. (2005), for both scaled hydropathy and |
|
214 |
+local charge. |
|
209 | 215 |
```{r} |
210 | 216 |
foldIndexR(sequence = HUMAN_P53, |
211 | 217 |
plotResults = TRUE) |
212 | 218 |
``` |
213 | 219 |
|
214 |
-Prilusky, J., Felder, C. E., et al. (2005). |
|
220 |
+ |
|
215 | 221 |
|
216 | 222 |
## Calculating Scaled Hydropathy |
217 | 223 |
|
... | ... |
@@ -61,7 +61,7 @@ The following matrices are available within **idpr**: |
61 | 61 |
The idpr package can be installed from Bioconductor with the following line of |
62 | 62 |
code. It requires the BiocManager package to be installed |
63 | 63 |
```{r} |
64 |
-#BiocManager::install("idpr") |
|
64 |
+#BiocManager::install("idpr") |
|
65 | 65 |
``` |
66 | 66 |
|
67 | 67 |
The most recent version of the package can be installed with the following line |
... | ... |
@@ -347,7 +347,7 @@ BLOSUM_MSA <- msa(TP53_Sequences, |
347 | 347 |
gapOpening = 10, |
348 | 348 |
gapExtension = 0.5) |
349 | 349 |
|
350 |
-print(BLOSUM_MSA, show="complete") |
|
350 |
+print(BLOSUM_MSA, show = "complete") |
|
351 | 351 |
``` |
352 | 352 |
|
353 | 353 |
|
... | ... |
@@ -358,7 +358,7 @@ EDSS_MSA <- msa(TP53_Sequences, |
358 | 358 |
gapOpening = 19, |
359 | 359 |
gapExtension = 2) |
360 | 360 |
|
361 |
-print(EDSS_MSA, show="complete") |
|
361 |
+print(EDSS_MSA, show = "complete") |
|
362 | 362 |
``` |
363 | 363 |
|
364 | 364 |
|
... | ... |
@@ -370,10 +370,11 @@ The user guide to **msa** shows an example of converting the sequence alignment |
370 | 370 |
Therefore, the IDP-specific matrices can be used for this type of analysis. |
371 | 371 |
The conversion uses both the **ape** and **seqinr** packages. |
372 | 372 |
```{r fig1, fig.height = 4, fig.width = 6} |
373 |
-EDSS_MSA_Tree <- msa::msaConvert(EDSS_MSA, type="seqinr::alignment") |
|
373 |
+EDSS_MSA_Tree <- msa::msaConvert(EDSS_MSA, type = "seqinr::alignment") |
|
374 | 374 |
d <- seqinr::dist.alignment(EDSS_MSA_Tree, "identity") |
375 | 375 |
p53Tree <- ape::nj(d) |
376 |
-plot(p53Tree, main="Phylogenetic Tree of p53 Sequences\nAligned with EDSSMat62") |
|
376 |
+plot(p53Tree, |
|
377 |
+ main = "Phylogenetic Tree of p53 Sequences\nAligned with EDSSMat62") |
|
377 | 378 |
``` |
378 | 379 |
|
379 | 380 |
|
... | ... |
@@ -13,7 +13,7 @@ vignette: > |
13 | 13 |
knitr::opts_chunk$set( |
14 | 14 |
collapse = TRUE, |
15 | 15 |
comment = "#>", |
16 |
- fig.width = 6, |
|
16 |
+ fig.width = 6, |
|
17 | 17 |
fig.height = 4 |
18 | 18 |
) |
19 | 19 |
``` |
... | ... |
@@ -121,7 +121,7 @@ sequence-based analysis into R. |
121 | 121 |
The package can be installed from Bioconductor with the following line of code. |
122 | 122 |
This requires the BiocManager package to be installed. |
123 | 123 |
```{r} |
124 |
-#BiocManager::install("idpr") |
|
124 |
+#BiocManager::install("idpr") |
|
125 | 125 |
``` |
126 | 126 |
|
127 | 127 |
The most recent version of the package can be installed with the following line |
... | ... |
@@ -308,7 +308,7 @@ head(p53_tendency_DF) #see the first few rows of the generated data frame |
308 | 308 |
|
309 | 309 |
sequenceMap(sequence = P53_HUMAN, |
310 | 310 |
property = p53_tendency_DF$Tendency, |
311 |
- customColors = c("#F0B5B3", "#A2CD5A", "#BF3EFF")) #generate the map |
|
311 |
+ customColors = c("#F0B5B3", "#A2CD5A", "#BF3EFF")) #generate the map |
|
312 | 312 |
``` |
313 | 313 |
|
314 | 314 |
sequenceMap() does accept continuous values as well. Additionally, custom plots |
... | ... |
@@ -13,7 +13,7 @@ vignette: > |
13 | 13 |
knitr::opts_chunk$set( |
14 | 14 |
collapse = TRUE, |
15 | 15 |
comment = "#>", |
16 |
- fig.width = 6, |
|
16 |
+ fig.width = 6, |
|
17 | 17 |
fig.height = 4 |
18 | 18 |
) |
19 | 19 |
``` |
... | ... |
@@ -21,7 +21,6 @@ knitr::opts_chunk$set( |
21 | 21 |
|
22 | 22 |
# Fetching IUPred Predictions of Intrinsic Disorder |
23 | 23 |
|
24 |
- |
|
25 | 24 |
## Quick Start |
26 | 25 |
|
27 | 26 |
The functions iupred(), iupredAnchor(), and iupredRedox() are all |
... | ... |
@@ -91,7 +90,7 @@ Both type of results will be shown for examples. |
91 | 90 |
The idpr package can be installed from Bioconductor with the following line of |
92 | 91 |
code. It requires the BiocManager package to be installed. |
93 | 92 |
```{r} |
94 |
-#BiocManager::install("idpr") |
|
93 |
+#BiocManager::install("idpr") |
|
95 | 94 |
``` |
96 | 95 |
|
97 | 96 |
The most recent version of the package can be installed with the following line |
... | ... |
@@ -214,7 +213,7 @@ iupredAnchor(p53_ID, |
214 | 213 |
The data frame for iupredAnchor has a similar layout to iupred(), |
215 | 214 |
with an additional column for ANCHOR2 scores. |
216 | 215 |
```{r} |
217 |
-iupredAnchorDF <- iupredAnchor(p53_ID, |
|
216 |
+iupredAnchorDF <- iupredAnchor(p53_ID, |
|
218 | 217 |
plotResults = FALSE) |
219 | 218 |
head(iupredAnchorDF) |
220 | 219 |
``` |
... | ... |
@@ -250,7 +249,7 @@ sensitive region was predicted. When redoxSensitive == TRUE, the residue is |
250 | 249 |
predicted to be in a redox sensitive region, when FALSE the residue is not |
251 | 250 |
predicted to be in a redox sensitive region. |
252 | 251 |
```{r} |
253 |
-iupredRedoxDF <- iupredRedox(p53_ID, |
|
252 |
+iupredRedoxDF <- iupredRedox(p53_ID, |
|
254 | 253 |
plotResults = FALSE) |
255 | 254 |
head(iupredRedoxDF) |
256 | 255 |
``` |
... | ... |
@@ -272,19 +271,14 @@ iupredLongDF <- iupred(p53_ID, |
272 | 271 |
|
273 | 272 |
sequenceMap(sequence = iupredLongDF$AA, |
274 | 273 |
property = iupredLongDF$IUPred2, |
275 |
- customColors = c("darkolivegreen3", "grey65", "darkorchid1")) + |
|
274 |
+ customColors = c("darkolivegreen3", "grey65", "darkorchid1")) + |
|
276 | 275 |
ggplot2::labs(title = "Prediction of Intrinsic Disorder in HUMAN P53", |
277 | 276 |
subtitle = "By IUPred2A long") |
278 |
- |
|
279 |
- |
|
280 |
- |
|
281 | 277 |
``` |
282 | 278 |
|
283 |
- |
|
284 |
-**For further details, please refer to idpr's ** |
|
279 |
+**For further details, please refer to idpr's** |
|
285 | 280 |
**"Sequence Map Vignette" file.** |
286 | 281 |
|
287 |
- |
|
288 | 282 |
## Getting the UniProt Accession |
289 | 283 |
|
290 | 284 |
To make a connection to the IUPred2A REST API, a UniProt Accession ID is |
... | ... |
@@ -11,12 +11,11 @@ vignette: > |
11 | 11 |
knitr::opts_chunk$set( |
12 | 12 |
collapse = TRUE, |
13 | 13 |
comment = "#>", |
14 |
- fig.width = 6, |
|
14 |
+ fig.width = 6, |
|
15 | 15 |
fig.height = 4 |
16 | 16 |
) |
17 | 17 |
``` |
18 | 18 |
|
19 |
- |
|
20 | 19 |
## Introduction |
21 | 20 |
|
22 | 21 |
One way to visualize results both within **idpr** and with data from other |
... | ... |
@@ -44,7 +43,7 @@ and stored within the **idpr** package for examples. |
44 | 43 |
The package can be installed from Bioconductor with the following line of code. |
45 | 44 |
It requires the BiocManager package to be installed. |
46 | 45 |
```{r} |
47 |
-#BiocManager::install("idpr") |
|
46 |
+#BiocManager::install("idpr") |
|
48 | 47 |
``` |
49 | 48 |
|
50 | 49 |
The most recent version of the package can be installed with the following line |
... | ... |
@@ -70,10 +69,9 @@ The values can be discrete, like the output of structuralTendency(), or |
70 | 69 |
continuous, like the output of chargeCalculationGlobal() |
71 | 70 |
|
72 | 71 |
```{r} |
73 |
-tendencyDF <- structuralTendency(sequence = P53_HUMAN) |
|
72 |
+tendencyDF <- structuralTendency(sequence = P53_HUMAN) |
|
74 | 73 |
head(tendencyDF) |
75 | 74 |
|
76 |
- |
|
77 | 75 |
chargeDF <- chargeCalculationGlobal(sequence = P53_HUMAN, |
78 | 76 |
includeTermini = FALSE) |
79 | 77 |
head(chargeDF) |
... | ... |
@@ -89,13 +87,12 @@ values in 'Charge'. |
89 | 87 |
```{r} |
90 | 88 |
sequenceMap( |
91 | 89 |
sequence = tendencyDF$AA, |
92 |
- property = tendencyDF$Tendency) |
|
93 |
- |
|
90 |
+ property = tendencyDF$Tendency) |
|
94 | 91 |
|
95 | 92 |
sequenceMap( |
96 | 93 |
sequence = as.character(chargeDF$AA), |
97 | 94 |
property = chargeDF$Charge, #character vector |
98 |
- customColors = c("blue", "red", "grey30")) |
|
95 |
+ customColors = c("blue", "red", "grey30")) |
|
99 | 96 |
|
100 | 97 |
``` |
101 | 98 |
|
... | ... |
@@ -194,25 +191,24 @@ sequenceMap( |
194 | 191 |
rotationAngle = 90) #45 residues each row |
195 | 192 |
``` |
196 | 193 |
|
197 |
- |
|
198 | 194 |
You can also specify colors for discrete values using a vector of colors. This |
199 | 195 |
is done with the "customColors" argument. |
200 | 196 |
```{r} |
201 | 197 |
sequenceMap( |
202 | 198 |
sequence = tendencyDF$AA, |
203 | 199 |
property = tendencyDF$Tendency, |
204 |
- customColors = c("#999999", "#E69F00", "#56B4E9")) |
|
200 |
+ customColors = c("#999999", "#E69F00", "#56B4E9")) |
|
205 | 201 |
``` |
206 | 202 |
|
207 | 203 |
Continuous variables custom colors are specified with a vector in the order of |
208 | 204 |
"High value", "Low Value", "Middle Value". Here the order is high = purple, |
209 |
-low = pink, and middle = light grey |
|
205 |
+low = pink, and middle = light grey. |
|
210 | 206 |
```{r} |
211 | 207 |
sequenceMap( |
212 | 208 |
sequence = as.character(chargeDF$AA), |
213 |
- property = chargeDF$Charge, |
|
209 |
+ property = chargeDF$Charge, |
|
214 | 210 |
customColors = c("purple", "pink", "grey90") |
215 |
- ) |
|
211 |
+ ) |
|
216 | 212 |
``` |
217 | 213 |
|
218 | 214 |
Since the output is a ggplot, the visualization is able to be assigned to an |
... | ... |
@@ -237,14 +233,14 @@ ggSequence <- ggSequence + |
237 | 233 |
y = 8.05, |
238 | 234 |
yend = 8.05, |
239 | 235 |
color = "#FF3562", |
240 |
- size = 1.5) + |
|
236 |
+ size = 1.5) + |
|
241 | 237 |
annotate("segment", |
242 | 238 |
x = 1, |
243 | 239 |
xend = 12.5, |
244 | 240 |
y = 3.05, |
245 | 241 |
yend = 3.05, |
246 | 242 |
color = "#FF3562", |
247 |
- size = 1.5) + |
|
243 |
+ size = 1.5) + |
|
248 | 244 |
annotate("segment", |
249 | 245 |
x = 1, |
250 | 246 |
xend = 40.5, |
... | ... |
@@ -260,13 +256,11 @@ ggSequence <- ggSequence + |
260 | 256 |
color = "#FF3562", |
261 | 257 |
size = 1.5) + |
262 | 258 |
annotate("text", |
263 |
- x = 36.35, |
|
259 |
+ x = 36.35, |
|
264 | 260 |
y = 0.65, |
265 | 261 |
label = "= DNA Binding", |
266 | 262 |
size = 3.5, |
267 | 263 |
hjust = 0) |
268 |
- |
|
269 |
- |
|
270 | 264 |
# Adding a plot title |
271 | 265 |
ggSequence <- ggSequence + |
272 | 266 |
labs(title = "P53 Structural Tendency") + |
... | ... |
@@ -280,7 +274,7 @@ ggSequence <- ggSequence + |
280 | 274 |
show.legend = FALSE, |
281 | 275 |
inherit.aes = FALSE) + |
282 | 276 |
annotate("text", |
283 |
- x = 4.5, |
|
277 |
+ x = 4.5, |
|
284 | 278 |
y = 4.3, |
285 | 279 |
label = "Metal Binding", |
286 | 280 |
size = 3) |
... | ... |
@@ -13,7 +13,7 @@ vignette: > |
13 | 13 |
knitr::opts_chunk$set( |
14 | 14 |
collapse = TRUE, |
15 | 15 |
comment = "#>", |
16 |
- fig.width = 6, |
|
16 |
+ fig.width = 6, |
|
17 | 17 |
fig.height = 4 |
18 | 18 |
) |
19 | 19 |
``` |
... | ... |
@@ -42,7 +42,7 @@ disorder‐neutral residues are D, T, and R (Uversky, 2013). |
42 | 42 |
The package can be installed from Bioconductor with the following line of code. |
43 | 43 |
It requires the BiocManager package to be installed |
44 | 44 |
```{r} |
45 |
-#BiocManager::install("idpr") |
|
45 |
+#BiocManager::install("idpr") |
|
46 | 46 |
``` |
47 | 47 |
|
48 | 48 |
The most recent version of the package can be installed with the following line |
... | ... |
@@ -127,7 +127,7 @@ Another possibility is the use of the sequenceMap() function within **idpr**. |
127 | 127 |
sequenceMap( |
128 | 128 |
sequence = tendencyDF$AA, |
129 | 129 |
property = tendencyDF$Tendency, |
130 |
- customColors = c("#999999", "#E69F00", "#56B4E9")) |
|
130 |
+ customColors = c("#999999", "#E69F00", "#56B4E9")) |
|
131 | 131 |
``` |
132 | 132 |
|
133 | 133 |
structuralTendency defines order- and disorder-promoting residues based on |
... | ... |
@@ -154,12 +154,11 @@ tendencyDF <- structuralTendency(P53_MOUSE, |
154 | 154 |
disorderNeutral = c("H", "M", "T", "D"), |
155 | 155 |
orderPromoting = c("W", "C", "F", "I", "Y", "V", "L", "N")) |
156 | 156 |
head(tendencyDF) |
157 |
- |
|
158 |
- |
|
157 |
+ |
|
159 | 158 |
sequenceMap( |
160 | 159 |
sequence = P53_MOUSE, |
161 | 160 |
property = tendencyDF$Tendency, |
162 |
- customColors = c("#999999", "#E69F00", "#56B4E9")) |
|
161 |
+ customColors = c("#999999", "#E69F00", "#56B4E9")) |
|
163 | 162 |
``` |
164 | 163 |
|
165 | 164 |
|