libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfeaturesscan.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/evalscan/psmfeaturesscan.cpp
3 * \date 15/07/2025
4 * \author Olivier Langella
5 * \brief compute features on scan's PSM
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfeaturesscan.h"
29#include <QCborArray>
30#include <QCborMap>
33
34
35namespace pappso
36{
37namespace cbor
38{
39namespace psm
40{
41
43 pappso::XtandemSpectrumProcess &tandem_spectrum_process,
44 std::list<Enums::PeptideIon> &ion_list,
45 pappso::PsmFeatures &psm_features,
46 pappso::PrecisionPtr fragment_tolerance)
47 : CborScanMapBase(psm_file_scan_process),
48 m_tandemSpectrumProcess(tandem_spectrum_process),
49 m_ionList(ion_list),
50 m_psmFeatures(psm_features)
51{
52 m_fragmentTolerance = fragment_tolerance;
53}
54
58
59double
60PsmFeaturesScan::checkInf(double input) const
61{
62 if(input < 0)
63 return 0;
64 return input;
65}
66
67void
69{
70
71 if(keys().contains("psm_list"))
72 {
74
75 pappso::MassSpectrum spectrum =
76 m_tandemSpectrumProcess.process(*qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
77 qualified_mass_spectrum.get()->getPrecursorMz(),
78 qualified_mass_spectrum.get()->getPrecursorCharge());
79
80 // qWarning() << qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
81 QCborArray new_psm_arr;
82 for(QCborValue cbor_psm : value("psm_list").toArray())
83 {
84 QCborMap cbor_psm_map = cbor_psm.toMap();
85 QCborMap cbor_psm_features;
86 pappso::PeptideSp peptide_sp =
87 pappso::PeptideProFormaParser::parseString(cbor_psm_map.value("proforma").toString());
88
89 if(peptide_sp.get()->size() < 2)
90 {
91 throw pappso::PappsoException(QObject::tr("peptide in psm too small %1")
92 .arg(cbor_psm_map.value("proforma").toString()));
93 }
94 // qWarning() << cbor_psm_map.value("proforma").toString() << "end " <<
95 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
96
97 std::size_t peptide_size = peptide_sp.get()->size();
98 cbor_psm_features.insert(QString("peptide_size"), (unsigned int)peptide_size);
99
100 pappso::XtandemHyperscore hyperscore(spectrum,
101 peptide_sp,
102 qualified_mass_spectrum.get()->getPrecursorCharge(),
104 m_ionList,
105 true);
106 cbor_psm_features.insert(QString("hyperscore"), QCborValue(hyperscore.getHyperscore()));
107 // qWarning() << cbor_psm_map.value("proforma").toString() << "end2 " <<
108 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
109
110 m_psmFeatures.setPeptideSpectrumCharge(
111 peptide_sp,
112 qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
113 qualified_mass_spectrum.get()->getPrecursorCharge(),
114 2);
115 // TIC
116 cbor_psm_features.insert(QString("total_intensity"),
117 std::log(m_psmFeatures.getTotalIntensity()));
118 // MaxIntALL
119 cbor_psm_features.insert(QString("max_intensity"),
120 checkInf(std::log(qualified_mass_spectrum.get()
121 ->getMassSpectrumSPtr()
122 .get()
123 ->maxIntensityDataPoint()
124 .y)));
125
126 // qWarning() << cbor_psm_map.value("proforma").toString() << "end3 " <<
127 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
128 // MaxYionInt
129 cbor_psm_features.insert(
130 QString("MaxYionInt"),
131 checkInf(std::log(m_psmFeatures.getMaxIntensityPeakIonMatch(Enums::PeptideIon::y))));
132
133 // MaxBionInt
134 cbor_psm_features.insert(
135 QString("MaxBionInt"),
136 checkInf(std::log(m_psmFeatures.getMaxIntensityPeakIonMatch(Enums::PeptideIon::b))));
137
138 // SumYmatchInt
139 cbor_psm_features.insert(
140 QString("SumYmatchInt"),
141 checkInf(std::log(m_psmFeatures.getIntensityOfMatchedIon(Enums::PeptideIon::y))));
142
143 // SumBmatchInt
144 cbor_psm_features.insert(
145 QString("SumBmatchInt"),
146 checkInf(std::log(m_psmFeatures.getIntensityOfMatchedIon(Enums::PeptideIon::b))));
147
148 // FracYmatchInt
149 cbor_psm_features.insert(
150 QString("FracYmatchInt"),
151 checkInf(m_psmFeatures.getIntensityOfMatchedIon(Enums::PeptideIon::y) /
152 m_psmFeatures.getTotalIntensity()));
153 // FracBmatchInt
154 cbor_psm_features.insert(
155 QString("FracBmatchInt"),
156 checkInf(m_psmFeatures.getIntensityOfMatchedIon(Enums::PeptideIon::b) /
157 m_psmFeatures.getTotalIntensity()));
158
159 // SeqCoverYion
160 cbor_psm_features.insert(
161 QString("SeqCoverYion"),
162 (double)m_psmFeatures.getAaSequenceCoverage(Enums::PeptideIon::y) /
163 (double)peptide_size);
164 // SeqCoverBion
165 cbor_psm_features.insert(
166 QString("SeqCoverBion"),
167 (double)m_psmFeatures.getAaSequenceCoverage(Enums::PeptideIon::b) /
168 (double)peptide_size);
169
170
171 // ConsecutiveYion
172 cbor_psm_features.insert(
173 QString("ConsecutiveYion"),
174 (qint64)m_psmFeatures.getMaxConsecutiveIon(Enums::PeptideIon::y));
175 // ConsecutiveBion
176 cbor_psm_features.insert(
177 QString("ConsecutiveBion"),
178 (qint64)m_psmFeatures.getMaxConsecutiveIon(Enums::PeptideIon::b));
179
180 // MassErrMean
181 cbor_psm_features.insert(QString("MassErrMean"), m_psmFeatures.getMatchedMzDiffMean());
182
183 // MassErrSD
184 cbor_psm_features.insert(QString("MassErrSD"), m_psmFeatures.getMatchedMzDiffSd());
185
186 // NumofAnnoPeaks
187 cbor_psm_features.insert(QString("NumofAnnoPeaks"),
188 (unsigned int)m_psmFeatures.getNumberOfMatchedIons());
189
190 // qWarning() << cbor_psm_map.value("proforma").toString() << "end2 " <<
191 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
192 // NumofComplementPeaks
193 std::size_t num_of_pairs = m_psmFeatures.countMatchedIonComplementPairs();
194 cbor_psm_features.insert(QString("NumofComplementPeaks"), (unsigned int)num_of_pairs);
195 if(num_of_pairs > 0)
196 {
197 // SumComplementPeaksInt
198 cbor_psm_features.insert(
199 QString("SumComplementPeaksInt"),
200 std::log(m_psmFeatures.getTotalIntensityOfMatchedIonComplementPairs()));
201
202 // FracComplementPeaksInt
203 cbor_psm_features.insert(
204 QString("FracComplementPeaksInt"),
205 m_psmFeatures.getTotalIntensityOfMatchedIonComplementPairs() /
206 m_psmFeatures.getTotalIntensity());
207 // SeqCoverComplementPeaks
208 cbor_psm_features.insert(
209 QString("SeqCoverComplementPeaks"),
210 (double)m_psmFeatures.getComplementPairsAaSequenceCoverage() /
211 (double)peptide_size);
212 }
213 pappso::LinearRegression lr = m_psmFeatures.getIonIsotopeLinearRegression();
214 cbor_psm_features.insert(QString("lrSize"), (unsigned int)lr.getSize());
215
216
217 double coeff_of_determination = lr.getCoefficientOfDetermination();
218 if(std::isnan(coeff_of_determination))
219 {
220 }
221 else
222 {
223 cbor_psm_features.insert(QString("lrCoeffDet"), coeff_of_determination);
224 }
225
226
227 QCborMap psm_eval = cbor_psm_map.value("eval").toMap();
228 psm_eval.remove(QString("features"));
229 psm_eval.insert(QString("features"), cbor_psm_features);
230 cbor_psm_map.remove(QString("eval"));
231 cbor_psm_map.insert(QString("eval"), psm_eval);
232
233 new_psm_arr.push_back(cbor_psm_map);
234 }
235
236 insert(QString("psm_list"), new_psm_arr);
237 // qWarning() << "end " <<
238 // qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();
239 }
240}
241
242} // namespace psm
243} // namespace cbor
244} // namespace pappso
std::size_t getSize() const
get data size
double getCoefficientOfDetermination() const
get Coefficient of determination (R2)
Class to represent a mass spectrum.
static PeptideSp parseString(const QString &pepstr)
pappso_double getHyperscore() const
CborScanMapBase(const PsmFileScanProcess &psm_file_scan_process)
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
PsmFeaturesScan(const PsmFileScanProcess &psm_file_scan_process, pappso::XtandemSpectrumProcess &tandem_spectrum_process, std::list< pappso::Enums::PeptideIon > &ion_list, pappso::PsmFeatures &psm_features, pappso::PrecisionPtr fragment_tolerance)
pappso::XtandemSpectrumProcess & m_tandemSpectrumProcess
std::list< pappso::Enums::PeptideIon > & m_ionList
double checkInf(double input) const
pappso::PrecisionPtr m_fragmentTolerance
Basic PSM file reader to process scan (parallelized scan processing).
@ y
Cter amino ions.
Definition types.h:295
@ b
Nter acylium ions.
Definition types.h:287
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
const PrecisionBase * PrecisionPtr
Definition precision.h:122