libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzxmloutput.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/output/mzxmloutput.cpp
3 * \date 23/11/2019
4 * \author Olivier Langella
5 * \brief write msrun peaks into mzxml output stream
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31#include "mzxmloutput.h"
32#include <QDebug>
33#include <QStringList>
34#include <algorithm>
35#include <cstdio>
36#include "pappsomspp/config.h"
37
38using namespace pappso;
39
40
41template <class T>
42T
44{
45 char *const p = reinterpret_cast<char *>(&in);
46 for(size_t i = 0; i < sizeof(T) / 2; ++i)
47 std::swap(p[i], p[sizeof(T) - i - 1]);
48 return in;
49}
50
51
53{
54 mp_output = p_mzxml_output;
55}
59void
61{
62 qDebug();
63 mp_output->m_monitor.count();
64 mp_output->writeQualifiedMassSpectrum(spectrum);
65 qDebug();
66}
67bool
69{
70 return true;
71}
72
73
74MzxmlOutput::MzxmlOutput(UiMonitorInterface &monitor, QIODevice *p_output_device)
75 : m_monitor(monitor)
76{
77
78 mpa_outputStream = new QXmlStreamWriter(p_output_device);
79 mpa_outputStream->setAutoFormatting(true);
80
81 mpa_outputStream->writeStartDocument("1.0");
82}
83
89
90void
92{
93 m_isReadAhead = isReadAhead;
94}
95void
97{
98 qDebug();
99 m_monitor.setTotalSteps(p_msrunreader->spectrumListSize());
100 writeHeader(p_msrunreader);
101
102 Translater translater(this);
103
104 translater.setReadAhead(m_isReadAhead);
105
107 // translater.setNeedMsLevelPeakList(1, false);
108 // translater.setNeedMsLevelPeakList(2, false);
109
110
111 MsRunReadConfig config;
112 std::vector<size_t> ms_levels;
113 for(std::size_t i = 1; i < 9; i++)
114 {
115 ms_levels.push_back(i);
116 }
117 config.setMsLevels(ms_levels);
118 config.setNeedPeakList(true);
119
120 p_msrunreader->readSpectrumCollection2(config, translater);
121
122 m_monitor.setTotalSteps(0);
123 qDebug();
124}
125
126void
128{
129 qDebug();
130 m_monitor.setTotalSteps(p_msrunreader->spectrumListSize());
131 writeHeader(p_msrunreader);
132
133 Translater translater(this);
134
135 translater.setReadAhead(m_isReadAhead);
136
138 // translater.setNeedMsLevelPeakList(1, false);
139 // translater.setNeedMsLevelPeakList(2, false);
140 p_msrunreader->readSpectrumCollection2(read_config, translater);
141
142 m_monitor.setTotalSteps(0);
143}
144
145void
147{
148
149 mpa_outputStream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance", "xsi");
150 // xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0"
151 // xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0
152 // http://sashimi.sourceforge.net/schema_revision/mzXML_2.0/mzXML_idx_2.0.xsd"
153 /*
154114 writer.setPrefix("xsi", xmlnsxsi);
155115 writer.setDefaultNamespace(namespaceURI);
156mpa_outputStream->writeStartElement("mzXML");
157117 writer.writeNamespace("xsi", xmlnsxsi);
158118 writer.writeDefaultNamespace(namespaceURI);
159119
160120 writer.writeAttribute(xmlnsxsi, "schemaLocation",
161xsischemaLocation); 121 */
162 mpa_outputStream->writeStartElement("mzXML");
163 mpa_outputStream->writeAttribute("xmlns",
164 "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2");
165 mpa_outputStream->writeAttribute("xsi:schemaLocation",
166 "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2 "
167 "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2/"
168 "mzXML_idx_3.2.xsd");
169
170 mpa_outputStream->writeStartElement("msRun");
171 mpa_outputStream->writeAttribute("scanCount",
172 QString("%1").arg(p_msrunreader->spectrumListSize()));
173 //<msRun scanCount="16576" startTime="PT0.292553S" endTime="PT3000.34S">
174 // writer.writeAttribute("scanCount",
175 // ms_run.getSpectrumCount(this.controller).toString());
176
177 /*
178 * # < parentFile fileName = #
179 * "file://SEQUEST1/raw/vidal/20060411_VIDAL_JEAN_1_PEPCR1_42140.RAW" #
180 * fileType = "RAWData" fileSha1 = #
181 * "23c1620d4ad3f4f0103b0141b7caec1e8b7eebf5" / >
182 */
183 mpa_outputStream->writeStartElement("parentFile");
184 mpa_outputStream->writeAttribute("fileName", p_msrunreader->getMsRunId()->getFileName());
185 mpa_outputStream->writeAttribute("fileType", "RAWData");
186 mpa_outputStream->writeEndElement();
187 /*
188144
189145 MsInstrumentList instrument_list =
190ms_run.getMsInstruments(controller); 146 for (MsInstrument
191instrument : instrument_list) { 147 this.write(instrument); 148 }
192*/
193
194 mpa_outputStream->writeStartElement("msInstrument");
195 mpa_outputStream->writeAttribute("msInstrumentID", "1");
196 //<msManufacturer category="msManufacturer" value="Thermo Scientific"/>
197 mpa_outputStream->writeStartElement("msManufacturer");
198 mpa_outputStream->writeAttribute("category", "msManufacturer");
199 mpa_outputStream->writeAttribute("value", "unknown");
200 mpa_outputStream->writeEndElement();
201 //<msModel category="msModel" value="Q Exactive"/>
202 // <msIonisation category="msIonisation" value="nanoelectrospray"/>
203 // <msMassAnalyzer category="msMassAnalyzer" value="quadrupole"/>
204 // <msDetector category="msDetector" value="inductive detector"/>
205 // <software type="acquisition" name="Xcalibur"
206 // version="2.1-152001/2.1.0.1520"/>
207 mpa_outputStream->writeEndElement();
208 /*
209149
210150 // #< dataProcessing centroided ="1" >
211151 // my $ref_data_processings =
212$ms_run_description->dataProcessing(); 152 MsDataProcessingList
213dataProcList = ms_run.getMsDataProcessings(controller); 153 for
214(MsDataProcessing msDataProc : dataProcList) { 154 this.write(msDataProc); 155 }
215*/
216 mpa_outputStream->writeStartElement("dataProcessing");
217 //<dataProcessing centroided="1">
218 mpa_outputStream->writeAttribute("centroided", "1");
219 // <software type="conversion" name="ProteoWizard" version="3.0.3706"/>
220 mpa_outputStream->writeStartElement("software");
221 mpa_outputStream->writeAttribute("type", "conversion");
222 mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
223 mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
224 mpa_outputStream->writeEndElement();
225 //<processingOperation name="Conversion to mzML"/>
226 mpa_outputStream->writeStartElement("processingOperation");
227 mpa_outputStream->writeAttribute("name", "Conversion to mzXML");
228 //<software type="processing" name="ProteoWizard" version="3.0.3706"/>
229 mpa_outputStream->writeStartElement("software");
230 mpa_outputStream->writeAttribute("type", "processing");
231 mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
232 mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
233 mpa_outputStream->writeEndElement();
234 //<comment>Thermo/Xcalibur peak picking</comment>
235 mpa_outputStream->writeStartElement("comment");
236 mpa_outputStream->writeCharacters("pappso::MzxmlOutput");
237 mpa_outputStream->writeEndElement();
238 //</dataProcessing>
239 mpa_outputStream->writeEndElement();
240 mpa_outputStream->writeEndElement();
241 // Peaks
242}
243
244
245void
247{
248 mpa_outputStream->writeEndDocument();
249}
250
251
252std::size_t
253MzxmlOutput::getScanNumberFromNativeId(const QString &native_id) const
254{
255 QStringList native_id_list = native_id.split("=");
256 if(native_id_list.size() < 2)
257 {
258 }
259 else
260 {
261 return native_id_list.back().toULong();
262 }
263 return std::numeric_limits<std::size_t>::max();
264}
265
266std::size_t
268{
269 std::size_t scan_number = getScanNumberFromNativeId(spectrum.getMassSpectrumId().getNativeId());
270 if(scan_number == std::numeric_limits<std::size_t>::max())
271 {
272 scan_number = spectrum.getMassSpectrumId().getSpectrumIndex() + 1;
273 }
274 return scan_number;
275}
276
277std::size_t
279{
280
281 std::size_t scan_number = getScanNumberFromNativeId(spectrum.getPrecursorNativeId());
282 if(scan_number == std::numeric_limits<std::size_t>::max())
283 {
284 scan_number = spectrum.getPrecursorSpectrumIndex() + 1;
285 }
286 return scan_number;
287}
288
289void
291{
292 qDebug();
293 mpa_outputStream->writeStartElement("scan");
294 /*
295 <scan num="1"
296 scanType="Full"
297 centroided="1"
298 msLevel="1"
299 peaksCount="1552"
300 polarity="+"
301 retentionTime="PT0.292553S"
302 lowMz="400.153411865234"
303 highMz="1013.123352050781"
304 basePeakMz="445.12003"
305 basePeakIntensity="2.0422125e06"
306 totIonCurrent="1.737798e07">*/
307 mpa_outputStream->writeAttribute("num", QString("%1").arg(getScanNumber(spectrum)));
308 mpa_outputStream->writeAttribute("centroided", QString("1"));
309 mpa_outputStream->writeAttribute("msLevel", QString("%1").arg(spectrum.getMsLevel()));
310 if(spectrum.getMassSpectrumCstSPtr().get() == nullptr)
311 {
312 mpa_outputStream->writeAttribute("peaksCount", "0");
313 }
314 else
315 {
316 mpa_outputStream->writeAttribute("peaksCount", QString("%1").arg(spectrum.size()));
317
318 if(spectrum.size() > 0)
319 {
320 mpa_outputStream->writeAttribute(
321 "lowMz", QString::number(spectrum.getMassSpectrumCstSPtr().get()->front().x, 'f', 12));
322
323 mpa_outputStream->writeAttribute(
324 "highMz", QString::number(spectrum.getMassSpectrumCstSPtr().get()->back().x, 'f', 12));
325 // mpa_outputStream->writeAttribute("highMz",
326 // QString::number(spectrum.getMassSpectrumCstSPtr().get()->back().x,
327 // 'f', 10)); basePeakMz="245.1271988"
328 // basePeakIntensity="5810.7739"
329 // totIonCurrent="57803.815999999999">
330 }
331 }
332 mpa_outputStream->writeAttribute("polarity", "+");
333 mpa_outputStream->writeAttribute(
334 "retentionTime", QString("PT%1S").arg(QString::number(spectrum.getRtInSeconds(), 'f', 2)));
335
336 if(spectrum.getMsLevel() > 1)
337 {
338
339 //<precursorMz precursorScanNum="16574"
340 // precursorIntensity="58403.04296875" precursorCharge="2"
341 ////activationMethod="HCD">994.690619901808</precursorMz>
342 mpa_outputStream->writeStartElement("precursorMz");
343 mpa_outputStream->writeAttribute("precursorScanNum",
344 QString("%1").arg(getPrecursorScanNumber(spectrum)));
345 bool ok;
346 double precursor_intensity = spectrum.getPrecursorIntensity(&ok);
347 if(ok)
348 {
349 mpa_outputStream->writeAttribute("precursorIntensity",
350 QString::number(precursor_intensity, 'f', 4));
351 }
352 uint charge = spectrum.getPrecursorCharge(&ok);
353 if(ok)
354 {
355 mpa_outputStream->writeAttribute("precursorCharge", QString("%1").arg(charge));
356 }
357 double precursor_mz = spectrum.getPrecursorMz(&ok);
358 if(ok)
359 {
360 mpa_outputStream->writeCharacters(QString::number(precursor_mz, 'f', 12));
361 }
362 mpa_outputStream->writeEndElement();
363 }
364
365 /*<peaks compressionType="none"
366 compressedLen="0"
367 precision="64"
368 byteOrder="network"
369 contentType="m/z-int"></peaks>*/
370
371 mpa_outputStream->writeStartElement("peaks");
372 mpa_outputStream->writeAttribute("compressionType", "none");
373 mpa_outputStream->writeAttribute("compressedLen", "0");
374 mpa_outputStream->writeAttribute("precision", "64");
375 mpa_outputStream->writeAttribute("byteOrder", "network");
376 mpa_outputStream->writeAttribute("contentType", "m/z-int");
377
378 if((spectrum.getMassSpectrumCstSPtr().get() != nullptr) && (!spectrum.isEmptyMassSpectrum()))
379 {
380 QByteArray byte_array;
381 if(QSysInfo::ByteOrder == QSysInfo::LittleEndian)
382 {
383 for(const DataPoint &peak : *(spectrum.getMassSpectrumCstSPtr().get()))
384 {
385 double swap = change_endian(peak.x);
386 byte_array.append((char *)&swap, 8);
387 swap = change_endian(peak.y);
388 byte_array.append((char *)&swap, 8);
389 }
390 }
391 else
392 {
393 for(const DataPoint &peak : *(spectrum.getMassSpectrumCstSPtr().get()))
394 {
395 byte_array.append((char *)&peak.x, 8);
396 byte_array.append((char *)&peak.y, 8);
397 }
398 }
399 mpa_outputStream->writeCharacters(byte_array.toBase64());
400 }
401 mpa_outputStream->writeEndElement();
402
403 // scan
404 mpa_outputStream->writeEndElement();
405 qDebug();
406}
407
408void
410{
411 m_ms1IsMasked = mask_ms1;
412}
std::size_t getSpectrumIndex() const
const QString & getNativeId() const
const QString & getFileName() const
Definition msrunid.cpp:161
void setNeedPeakList(bool need_peak_list)
void setMsLevels(std::vector< std::size_t > ms_levels)
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition msrunreader.h:64
virtual std::size_t spectrumListSize() const =0
get the totat number of spectrum conained in the MSrun data file
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
const MsRunIdCstSPtr & getMsRunId() const
Translater(MzxmlOutput *p_mzxml_output)
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
void setReadAhead(bool read_ahead)
std::size_t getScanNumberFromNativeId(const QString &native_id) const
MzxmlOutput(UiMonitorInterface &monitor, QIODevice *p_output_device)
void write(MsRunReader *p_msrunreader)
UiMonitorInterface & m_monitor
Definition mzxmloutput.h:91
std::size_t getScanNumber(const QualifiedMassSpectrum &spectrum) const
QXmlStreamWriter * mpa_outputStream
Definition mzxmloutput.h:92
void writeQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)
void maskMs1(bool mask_ms1)
std::size_t getPrecursorScanNumber(const QualifiedMassSpectrum &spectrum) const
void writeHeader(MsRunReader *p_msrunreader)
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
uint getPrecursorCharge(bool *ok=nullptr) const
get precursor charge
const QString & getPrecursorNativeId() const
pappso_double getPrecursorIntensity(bool *ok=nullptr) const
get precursor intensity
const MassSpectrumId & getMassSpectrumId() const
Get the MassSpectrumId.
pappso_double getPrecursorMz(bool *ok=nullptr) const
get precursor mz
std::size_t getPrecursorSpectrumIndex() const
Get the scan number of the precursor ion.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
virtual void setReadAhead(bool is_read_ahead) final
use threads to read a spectrum by batch of batch_size
virtual void setNeedMsLevelPeakList(unsigned int ms_level, bool want_peak_list) final
tells if we need the peak list given
#define PAPPSOMSPP_VERSION
Definition config.h:6
#define PAPPSOMSPP_NAME
Definition config.h:5
T change_endian(T in)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
unsigned int uint
Definition types.h:67