Browse code

Updating pwiz to 3_0_21263

Steffen Neumann authored on 23/09/2021 12:34:25
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,218 @@
1
+//
2
+// $Id$
3
+//
4
+//
5
+// Original author: Jarrett Egertson <jegertso .a.t uw.edu>
6
+//
7
+// Licensed under the Apache License, Version 2.0 (the "License"); 
8
+// you may not use this file except in compliance with the License. 
9
+// You may obtain a copy of the License at 
10
+//
11
+// http://www.apache.org/licenses/LICENSE-2.0
12
+//
13
+// Unless required by applicable law or agreed to in writing, software 
14
+// distributed under the License is distributed on an "AS IS" BASIS, 
15
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
16
+// See the License for the specific language governing permissions and 
17
+// limitations under the License.
18
+//
19
+
20
+#include "pwiz/data/msdata/MSDataFile.hpp"
21
+#include "pwiz/data/msdata/Serializer_mzML.hpp"
22
+#include "pwiz/data/msdata/Diff.hpp"
23
+#include "pwiz/utility/misc/unit.hpp"
24
+#include "pwiz/utility/misc/Std.hpp"
25
+#include "pwiz_tools/common/FullReaderList.hpp"
26
+#include <boost/program_options/options_description.hpp>
27
+#include <boost/program_options/parsers.hpp>
28
+#include <boost/program_options/variables_map.hpp>
29
+#include <pwiz/analysis/demux/EnumConstantNotPresentException.hpp>
30
+#include <pwiz/analysis/spectrum_processing/SpectrumList_Demux.hpp>
31
+#include <pwiz/analysis/spectrum_processing/SpectrumList_PeakPicker.hpp>
32
+#include <pwiz/analysis/spectrum_processing/SpectrumListFactory.hpp>
33
+#include <boost/chrono/chrono.hpp>
34
+#include <src/Core/products/Parallelizer.h>
35
+
36
+#define PRISM_VERSION "0.3"
37
+
38
+using namespace pwiz::data;
39
+using namespace pwiz::msdata;
40
+using namespace pwiz::util;
41
+using namespace pwiz::analysis;
42
+
43
+namespace po = boost::program_options;
44
+
45
+struct Optimization
46
+{
47
+    Optimization(const string& s) : value(s) {}
48
+    string value;
49
+};
50
+
51
+// This overload is needed for using default_value when building a variable_map. The default_value function relies on boost::lexical_cast, which relies on this operator.
52
+ostream& operator <<(ostream& stream, const Optimization& opt) { return stream << opt.value; }
53
+
54
+/// Overload the 'validate' function for the user-defined Optimization option class.
55
+void validate(boost::any& v,
56
+    const vector<string>& values,
57
+    Optimization*,
58
+    int)
59
+{
60
+    // Make sure no previous assignment to 'a' was made.
61
+    po::validators::check_first_occurrence(v);
62
+    // Extract the first string from 'values'. If there is more than
63
+    // one string, it's an error, and exception will be thrown.
64
+    const string& s = po::validators::get_single_string(values);
65
+
66
+    // Try conversion to desired enum type
67
+    try
68
+    {
69
+        SpectrumList_Demux::Params::stringToOptimization(s);
70
+        v = boost::any(Optimization(s));
71
+    }
72
+    catch (EnumConstantNotPresentException&)
73
+    {
74
+        throw po::invalid_option_value(s);
75
+    }
76
+}
77
+
78
+class IterationListenerStream : public IterationListener
79
+{
80
+public:
81
+    explicit IterationListenerStream(ostream &os = std::cerr) : os_(os), entfmt_("%5.1f"){}
82
+
83
+    Status update(const UpdateMessage& updateMessage) override
84
+    {
85
+        auto percentProgress = 100.0 * float(updateMessage.iterationIndex) / float(updateMessage.iterationCount);
86
+        os_ << "\rDemultiplexing: " << entfmt_ % percentProgress << "% (Spectrum "
87
+            << updateMessage.iterationIndex << " of " << updateMessage.iterationCount << ")       ";
88
+        return Status_Ok;
89
+    }
90
+private:
91
+    ostream &os_;
92
+    boost::format entfmt_;
93
+};
94
+
95
+po::variables_map parseCommandLine(int argc, char **argv);
96
+
97
+void run(int argc, char ** argv)
98
+{
99
+    // The Eigen library must be initialized to support multithreaded operations before creating the threads
100
+    Eigen::initParallel();
101
+    auto start = boost::chrono::system_clock::now();
102
+    std::cerr << "Welcome to Prism v" << PRISM_VERSION << endl;
103
+    auto vm = parseCommandLine(argc, argv);
104
+    
105
+    SpectrumList_Demux::Params demuxParams;
106
+    demuxParams.massError = vm["massError"].as<pwiz::chemistry::MZTolerance>();
107
+    demuxParams.nnlsMaxIter = vm["nnlsMaxIter"].as<unsigned int>();
108
+    demuxParams.nnlsEps = vm["nnlsEps"].as<double>();
109
+    demuxParams.applyWeighting = !vm["noWeighting"].as<bool>();
110
+    demuxParams.demuxBlockExtra = vm["demuxBlockExtra"].as<double>();
111
+    demuxParams.variableFill = vm["variableFill"].as<bool>();
112
+    demuxParams.regularizeSums = !vm["noSumNormalize"].as<bool>();
113
+    demuxParams.optimization = SpectrumList_Demux::Params::stringToOptimization(vm["optimization"].as<Optimization>().value);
114
+    demuxParams.padScanTimes = vm["padScanTimes"].as<bool>();
115
+    demuxParams.interpolateRetentionTime = vm["interpolateRT"].as<bool>();
116
+    bool skipCentroiding = vm["skipCentroiding"].as<bool>();
117
+
118
+    FullReaderList readers;
119
+    MSDataFile msd(vm["inputfile"].as<string>(), &readers);
120
+    IntegerSet levelsToCentroid(1,2);
121
+
122
+    // Bypass centroiding
123
+    SpectrumListPtr toDemuxPtr = msd.run.spectrumListPtr;
124
+    if (!skipCentroiding)
125
+    {
126
+        SpectrumListPtr tempPtr = toDemuxPtr;
127
+        toDemuxPtr.reset(new SpectrumList_PeakPicker(tempPtr,
128
+                PeakDetectorPtr(boost::make_shared<LocalMaximumPeakDetector>(3)),
129
+                true,
130
+                levelsToCentroid));
131
+        msd.filterApplied();
132
+    }
133
+
134
+    SpectrumListPtr demux_list(new SpectrumList_Demux(toDemuxPtr, demuxParams));
135
+    msd.filterApplied();
136
+    msd.run.spectrumListPtr = demux_list;
137
+    // set up progress indicator
138
+    IterationListenerPtr listenerPtr(new IterationListenerStream(std::cerr));
139
+    IterationListenerRegistry listenerRegistry;
140
+    listenerRegistry.addListener(listenerPtr, 10);
141
+
142
+    MSDataFile::WriteConfig writeConfig;
143
+    MSDataFile::write(msd, vm["outputfile"].as<string>(), writeConfig, &listenerRegistry);
144
+    std::cerr << endl;
145
+    boost::chrono::duration<double> sec = boost::chrono::system_clock::now() - start;
146
+    std::cout << "Demultiplexing took " << sec.count() / 60.0 << " minutes." << std::endl;
147
+}
148
+
149
+po::variables_map parseCommandLine(int argc, char **argv)
150
+{
151
+    po::options_description desc("PRISM arguments");
152
+    // Get default values
153
+    SpectrumList_Demux::Params demuxParams;
154
+    Optimization defaultOptimization(SpectrumList_Demux::Params::optimizationToString(demuxParams.optimization));
155
+    bool skipCentroiding = false;
156
+    
157
+    desc.add_options()
158
+        ("help,h", "Produce this help message.")
159
+        ("massError", po::value<pwiz::chemistry::MZTolerance>()->default_value(demuxParams.massError),
160
+        "MS/MS error between spectra. Can specify number and units of either ppm or da, e.g. 10ppm or 0.4da. Default 10ppm.")
161
+        ("variableFill", po::bool_switch()->default_value(demuxParams.variableFill),
162
+        "MSX data was acquired with variable fill times.")
163
+        ("demuxBlockExtra", po::value<double>()->default_value(demuxParams.demuxBlockExtra),
164
+        "Extra slop to include in the set of spectra used for demux. Number of spectra added is input * num spectra in one cycle")
165
+        ("nnlsMaxIter", po::value<unsigned int>()->default_value(demuxParams.nnlsMaxIter),
166
+        "Maximum number of iterations for demux solve.")
167
+        ("nnlsEps", po::value<double>()->default_value(demuxParams.nnlsEps),
168
+        "Epsilon value for testing for convergence of demux solve.")
169
+        ("noSumNormalize", po::bool_switch()->default_value(!demuxParams.regularizeSums),
170
+        "Do not normalize the raw demux output for a spectrum so it sums up to the multiplexed peak intensity")
171
+        ("noWeighting", po::bool_switch()->default_value(!demuxParams.applyWeighting),
172
+        "Use non-weighted non negative least squares")
173
+        ("optimization", po::value<Optimization>()->default_value(defaultOptimization),
174
+        "Use optimizations. Available optimizations are \"none\", and \"overlap_only\"" )
175
+        ("padScanTimes", po::bool_switch()->default_value(demuxParams.padScanTimes),
176
+        "Pad scan times with small increment to prevent simultaneous spectra (which some software cannot handle)")
177
+        ("interpolateRT", po::bool_switch()->default_value(demuxParams.interpolateRetentionTime),
178
+        "Interpolate retention times. This only applies when overlap demultiplexing with no MSX (overlap only)")
179
+        ("skipCentroiding", po::bool_switch()->default_value(skipCentroiding),
180
+        "Set to true to use profile data if available")
181
+        ("inputfile", po::value<string>()->required(),
182
+        "Input spectra file")
183
+        ("outputfile", po::value<string>()->required(),
184
+        "Output spectra file");
185
+    
186
+    po::positional_options_description p;
187
+    p.add("inputfile", 1);
188
+    p.add("outputfile", 1);
189
+    po::variables_map vm;
190
+    po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
191
+    if (vm.count("help"))
192
+    {
193
+        std::cout << desc << endl;
194
+    }
195
+    notify(vm);
196
+    return vm;
197
+}
198
+
199
+
200
+int main(int argc, char **argv)
201
+{
202
+    try
203
+    {
204
+        run(argc, argv);
205
+
206
+        return 0;
207
+    }
208
+    catch (std::exception& e)
209
+    {
210
+        std::cerr << e.what() << endl;
211
+    }
212
+    catch (...)
213
+    {
214
+        std::cerr << "Caught unknown exception.\n";
215
+    }
216
+
217
+    return 1;
218
+}
0 219
\ No newline at end of file