... | ... |
@@ -2,7 +2,7 @@ Package: MSnID |
2 | 2 |
Type: Package |
3 | 3 |
Title: Utilities for Exploration and Assessment of Confidence of |
4 | 4 |
LC-MSn Proteomics Identifications |
5 |
-Version: 1.7.3 |
|
5 |
+Version: 1.15.2 |
|
6 | 6 |
Author: Vlad Petyuk with contributions from Laurent Gatto |
7 | 7 |
Maintainer: Vlad Petyuk <petyuk@gmail.com> |
8 | 8 |
Description: Extracts MS/MS ID data from mzIdentML (leveraging mzID package) or |
... | ... |
@@ -19,4 +19,4 @@ Imports: MSnbase (>= 1.12.1), mzID (>= 1.3.5), R.cache, |
19 | 19 |
data.table, Biobase, ProtGenerics, reshape2, dplyr, mzR |
20 | 20 |
Suggests: BiocStyle, msmsTests, ggplot2, RUnit, BiocGenerics |
21 | 21 |
LazyData: yes |
22 |
-biocViews: Proteomics, MassSpectrometry |
|
22 |
+biocViews: Proteomics, MassSpectrometry, ImmunoOncology |
... | ... |
@@ -13,7 +13,8 @@ importFrom("ProtGenerics", "proteins") |
13 | 13 |
importFrom("mzID", "mzID") |
14 | 14 |
importFrom("mzR","openIDfile","score","modifications","fileName") |
15 | 15 |
importMethodsFrom("mzID", "flatten") |
16 |
-importFrom("parallel", "mclapply", "detectCores","makeCluster","stopCluster") |
|
16 |
+importFrom("parallel", "mclapply", "detectCores","makeCluster", |
|
17 |
+ "stopCluster","parLapply","clusterExport","clusterEvalQ") |
|
17 | 18 |
importClassesFrom("MSnbase", "MSnSet") |
18 | 19 |
importClassesFrom("Biobase", "AnnotatedDataFrame") |
19 | 20 |
#----------------------------------------------------------------------------- |
... | ... |
@@ -1,3 +1,21 @@ |
1 |
+Version 1.15.2 |
|
2 |
+------------- |
|
3 |
+ o Broadened the scope of the tool to ImmunoOncology - that is finding rare |
|
4 |
+ events that reflected in protein sequence. |
|
5 |
+ |
|
6 |
+Version 1.15.1 |
|
7 |
+------------- |
|
8 |
+ o mzR now added scan number(s) into the table representation of |
|
9 |
+ mzIdentML object. As a results it caused an error in my unit test |
|
10 |
+ checking if the file reads properly. Fixed this check with updated hash. |
|
11 |
+ |
|
12 |
+Version 1.11.1 |
|
13 |
+------------- |
|
14 |
+ o Switch from mclapply to parLapply in filter optimizaition. |
|
15 |
+ This fixes a bug |
|
16 |
+ "Assertion failure at kmp_runtime.cpp(6480): __kmp_thread_pool == __null." |
|
17 |
+ Also allows to run parallel on Window OS |
|
18 |
+ |
|
1 | 19 |
Version 1.7.3 |
2 | 20 |
------------- |
3 | 21 |
o Logical error fix in PSM FDR calculation. Prior it ignored redundancy |
... | ... |
@@ -155,13 +155,26 @@ setMethod( |
155 | 155 |
|
156 | 156 |
|
157 | 157 |
#----Filter--------------------------------------------------------------------- |
158 |
+# # Old implementation. Just as a backup. |
|
159 |
+# # See https://github.com/vladpetyuk/MSnID/issues/5 for the issue. |
|
160 |
+# setMethod( |
|
161 |
+# "apply_filter", |
|
162 |
+# signature(msnidObj="MSnID", filterObj="character"), |
|
163 |
+# definition=function(msnidObj, filterObj) |
|
164 |
+# { |
|
165 |
+# exprssn <- parse(text=filterObj, srcfile=NULL, keep.source=FALSE) |
|
166 |
+# msnidObj@psms <- msnidObj@psms[eval(exprssn),] |
|
167 |
+# return(msnidObj) |
|
168 |
+# } |
|
169 |
+# ) |
|
158 | 170 |
setMethod( |
159 | 171 |
"apply_filter", |
160 | 172 |
signature(msnidObj="MSnID", filterObj="character"), |
161 | 173 |
definition=function(msnidObj, filterObj) |
162 | 174 |
{ |
163 | 175 |
exprssn <- parse(text=filterObj, srcfile=NULL, keep.source=FALSE) |
164 |
- msnidObj@psms <- msnidObj@psms[eval(exprssn),] |
|
176 |
+ idx <- eval(exprssn, envir = msnidObj@psms, enclos = parent.frame()) |
|
177 |
+ msnidObj@psms <- msnidObj@psms[idx,] |
|
165 | 178 |
return(msnidObj) |
166 | 179 |
} |
167 | 180 |
) |
... | ... |
@@ -567,15 +580,29 @@ setAs("MSnID", "MSnSet", |
567 | 580 |
setMethod("infer_parsimonious_accessions", "MSnID", |
568 | 581 |
definition=function(object) |
569 | 582 |
{ |
583 |
+ # # Old code for inferring accessions. |
|
584 |
+ # # It is too slow. |
|
585 |
+ # infer_acc <- function(x){ |
|
586 |
+ # res <- list() |
|
587 |
+ # while(nrow(x) > 0){ |
|
588 |
+ # top_prot <- names(which.max(table(x[['accession']]))) |
|
589 |
+ # top_peps <- subset(x, accession == top_prot) |
|
590 |
+ # res <- c(res, list(top_peps)) |
|
591 |
+ # x <- subset(x, !(pepSeq %in% top_peps[,"pepSeq"])) |
|
592 |
+ # } |
|
593 |
+ # return(Reduce(rbind,res)) |
|
594 |
+ # } |
|
595 |
+ |
|
570 | 596 |
infer_acc <- function(x){ |
571 | 597 |
res <- list() |
598 |
+ setDT(x) |
|
572 | 599 |
while(nrow(x) > 0){ |
573 |
- top_prot <- names(which.max(table(x[['accession']]))) |
|
600 |
+ top_prot <- x[, .N, by=accession][which.max(N),,]$accession |
|
574 | 601 |
top_peps <- subset(x, accession == top_prot) |
575 | 602 |
res <- c(res, list(top_peps)) |
576 |
- x <- subset(x, !(pepSeq %in% top_peps[,"pepSeq"])) |
|
603 |
+ x <- subset(x, !(pepSeq %in% top_peps[[1]])) |
|
577 | 604 |
} |
578 |
- return(Reduce(rbind,res)) |
|
605 |
+ return(rbindlist(res, use.names=F, fill=FALSE, idcol=NULL)) |
|
579 | 606 |
} |
580 | 607 |
|
581 | 608 |
x <- unique(psms(object)[,c("pepSeq","accession")]) |
... | ... |
@@ -88,6 +88,34 @@ |
88 | 88 |
} |
89 | 89 |
|
90 | 90 |
|
91 |
+.optimize_filter.grid.parLapply <- function(filterObj, msnidObj, |
|
92 |
+ fdr.max, level, n.iter, mc.cores) |
|
93 |
+{ |
|
94 |
+ par.grid <- .construct_optimization_grid(filterObj, msnidObj, n.iter) |
|
95 |
+ cl <- makeCluster(mc.cores) |
|
96 |
+ # clusterExport(cl, |
|
97 |
+ # list(".get_num_pep_for_fdr"), |
|
98 |
+ # envir = "package:MSnID") |
|
99 |
+ clusterEvalQ(cl, library("MSnID")) |
|
100 |
+ evaluations <- parLapply(cl, |
|
101 |
+ seq_len(nrow(par.grid)), |
|
102 |
+ function(i){ |
|
103 |
+ .get_num_pep_for_fdr(par.grid[i,], |
|
104 |
+ msnidObj, |
|
105 |
+ filterObj, |
|
106 |
+ fdr.max, |
|
107 |
+ level)}) |
|
108 |
+ stopCluster(cl) |
|
109 |
+ evaluations <- round(unlist(evaluations)) |
|
110 |
+ if(all(evaluations == 0)){ |
|
111 |
+ warning(.msg.invalid.optimization.results.grid) |
|
112 |
+ return(filterObj) |
|
113 |
+ } |
|
114 |
+ optim.pars <- par.grid[which.max(evaluations),] |
|
115 |
+ newFilter <- update(filterObj, as.numeric(optim.pars)) |
|
116 |
+ return(newFilter) |
|
117 |
+} |
|
118 |
+ |
|
91 | 119 |
setMethod("optimize_filter", |
92 | 120 |
signature(filterObj="MSnIDFilter", msnidObj="MSnID"), |
93 | 121 |
definition=function(filterObj, msnidObj, fdr.max, |
... | ... |
@@ -120,19 +148,24 @@ setMethod("optimize_filter", |
120 | 148 |
} |
121 | 149 |
# |
122 | 150 |
if(method == "Grid"){ |
123 |
- if(.Platform$OS.type == "unix"){ |
|
124 |
- if(is.null(mc.cores)) |
|
125 |
- mc.cores <- getOption("mc.cores", 2L) |
|
126 |
- optimFilter <- |
|
127 |
- .optimize_filter.grid.mclapply(filterObj, msnidObj, |
|
128 |
- fdr.max, level, |
|
129 |
- n.iter, mc.cores) |
|
130 |
- }else{ |
|
131 |
- # yet to implement effective parallelization on Windows |
|
132 |
- optimFilter <- |
|
133 |
- .optimize_filter.grid(filterObj, msnidObj, |
|
134 |
- fdr.max, level, n.iter) |
|
135 |
- } |
|
151 |
+ # if(.Platform$OS.type == "unix"){ |
|
152 |
+ # if(is.null(mc.cores)) |
|
153 |
+ # mc.cores <- getOption("mc.cores", 2L) |
|
154 |
+ # optimFilter <- |
|
155 |
+ # .optimize_filter.grid.mclapply(filterObj, msnidObj, |
|
156 |
+ # fdr.max, level, |
|
157 |
+ # n.iter, mc.cores) |
|
158 |
+ # }else{ |
|
159 |
+ # # yet to implement effective parallelization on Windows |
|
160 |
+ # optimFilter <- |
|
161 |
+ # .optimize_filter.grid(filterObj, msnidObj, |
|
162 |
+ # fdr.max, level, n.iter) |
|
163 |
+ # } |
|
164 |
+ mc.cores <- getOption("mc.cores", 2L) |
|
165 |
+ optimFilter <- |
|
166 |
+ .optimize_filter.grid.parLapply(filterObj, msnidObj, |
|
167 |
+ fdr.max, level, |
|
168 |
+ n.iter, mc.cores) |
|
136 | 169 |
} |
137 | 170 |
if(method %in% c("Nelder-Mead", "SANN")){ |
138 | 171 |
optim.out <- optim(par=as.numeric(filterObj), |
... | ... |
@@ -1,3 +1,5 @@ |
1 | 1 |
MSnID |
2 | 2 |
==== |
3 |
+[](http://www.rdocumentation.org/packages/MSnID) |
|
4 |
+=== |
|
3 | 5 |
`MSnID` is an R/Bioconductor package that provides convience tools for handling and filtering of MS/MS identification results. The official page is the Bioconductor landing page ([release](http://www.bioconductor.org/packages/release/bioc/html/MSnID.html) or [devel](http://www.bioconductor.org/packages/devel/bioc/html/MSnID.html) versions). The [github page](https://github.com/vladpetyuk/MSnID) page is for sharing, testing, issue tracking and forking/pulling purposes. |
... | ... |
@@ -30,5 +30,6 @@ test_column_names <- function() { |
30 | 30 |
|
31 | 31 |
test_data_load_mzR <- function() { |
32 | 32 |
# now, check if it is what it supposed to be |
33 |
- checkIdentical(digest(psms(msnid3)),'e5c572c07878673f1165822969f81869') |
|
33 |
+ # checkIdentical(digest(psms(msnid3)),'e5c572c07878673f1165822969f81869') |
|
34 |
+ checkIdentical(digest(psms(msnid3)),'0b4e3b61e3fe007ed11651632fa3f1fb') |
|
34 | 35 |
} |
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
library("MSnID") |
2 | 2 |
data(c_elegans) |
3 | 3 |
|
4 |
-test_infer_parsimonious_accessions <- function(){ |
|
5 | 4 |
|
6 |
- # explicitely adding parameters that will be used for data filtering |
|
5 |
+test_infer_parsimonious_accessions_old <- function(){ |
|
6 |
+ # explicitely adding parameters that will be used for data filtering |
|
7 | 7 |
msnidObj$msmsScore <- -log10(msnidObj$`MS-GF:SpecEValue`) |
8 | 8 |
msnidObj$absParentMassErrorPPM <- abs(mass_measurement_error(msnidObj)) |
9 | 9 |
|
... | ... |
@@ -16,3 +16,33 @@ test_infer_parsimonious_accessions <- function(){ |
16 | 16 |
} |
17 | 17 |
|
18 | 18 |
|
19 |
+ |
|
20 |
+# Above is the old function for testing protein inference. I'll leave it for |
|
21 |
+# now. Below is the new way, where first all the inference will be done |
|
22 |
+# outside of the test functions. |
|
23 |
+ |
|
24 |
+ |
|
25 |
+# explicitely adding parameters that will be used for data filtering |
|
26 |
+msnidObj$msmsScore <- -log10(msnidObj$`MS-GF:SpecEValue`) |
|
27 |
+msnidObj$absParentMassErrorPPM <- abs(mass_measurement_error(msnidObj)) |
|
28 |
+# quick-and-dirty filter. The filter is too strong for the sake of saving time |
|
29 |
+# at the minimal set of proteins inference step. |
|
30 |
+msnidObj <- apply_filter(msnidObj, 'msmsScore > 12 & absParentMassErrorPPM < 2') |
|
31 |
+msnidObj2 <- infer_parsimonious_accessions(msnidObj) |
|
32 |
+ |
|
33 |
+test_infer_parsimonious_accessions_number <- function(){ |
|
34 |
+ checkEqualsNumeric(length(proteins(msnidObj2)), 551) |
|
35 |
+} |
|
36 |
+ |
|
37 |
+# test_infer_parsimonious_accessions_hash <- function(){ |
|
38 |
+# checkIdentical(digest(psms(msnidObj2)),'42c967304603b17ef667fae5b8d5657f') |
|
39 |
+# } |
|
40 |
+ |
|
41 |
+test_infer_parsimonious_accessions_hash <- function(){ |
|
42 |
+ checkIdentical(digest(psms(msnidObj2)$accession), |
|
43 |
+ '6a3566a95b2e49a0f966d22ed897a752') |
|
44 |
+} |
|
45 |
+ |
|
46 |
+# Future challenges is to come up with tests that check inference that is |
|
47 |
+# done outside of MSnID object |
|
48 |
+ |