libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
binarydataarray.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/core/processing/cbor/mzcbor/binarydataarray.cpp
3 * \date 25/11/2025
4 * \author Olivier Langella
5 * \brief PSI BinaryDataArray object for mzML/mzCBOR
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28
29#include "binarydataarray.h"
30#include "cvparam.h"
32#include <qjsonarray.h>
33#include <qlogging.h>
34#include <zlib.h>
35
36void
38{
39 QString txt_value;
40 reader.enterContainer();
41 // qDebug() << txt_value;
42 while(reader.hasNext() && (!reader.isInvalid()))
43 {
44 if(reader.isString())
45 {
46 if(reader.decodeString(txt_value))
47 {
48 // qDebug() << txt_value;
49 if(txt_value == "bits")
50 {
51 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
52 bits = reader.toUnsignedInteger();
53 reader.next();
54 }
55 else if(txt_value == "isInt")
56 {
57 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
58 isInt = reader.toBool();
59 reader.next();
60 }
61 else if(txt_value == "unit")
62 {
63 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
64 reader.decodeString(txt_value);
65 unit = txt_value;
66 }
67 else if(txt_value == "compress")
68 {
69 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
70 reader.decodeString(txt_value);
71 compress = txt_value;
72 }
73 else if(txt_value == "data")
74 {
75 // cvParamMap = CvParam::getCvParamsMapFromCbor(reader);
76 // reader.next();
77 // qDebug() << reader.type();
78 auto r = reader.readByteArray();
79 while(r.status == QCborStreamReader::Ok)
80 {
81 byteArray += r.data;
82 r = reader.readByteArray();
83 }
84
85 if(r.status == QCborStreamReader::Error)
86 {
87 // handle error condition
88 // qDebug() << "error";
89 byteArray.clear();
90 }
91 }
92 else
93 {
94 reader.next();
95 }
96 }
97 else
98 {
99 reader.next();
100 }
101 }
102 else
103 {
104 reader.next();
105 }
106 }
107 reader.leaveContainer();
108}
109
110void
112{
113
114 writer.startMap();
115 writer.append("unit");
116 writer.append(unit);
117
118 writer.append("bits");
119 writer.append(bits);
120 writer.append("isInt");
121 writer.append(isInt);
122
123 writer.append("compress");
124 writer.append(compress);
125
126 writer.append("data");
127 writer.append(byteArray);
128 writer.endMap();
129}
130
131void
133{
134
135 qDebug();
136 //<binaryDataArray encodedLength="6380">
137 std::size_t encodedLength = reader.attributes().value("encodedLength").toULongLong();
138 qDebug() << "encodedLength=" << encodedLength;
139 while(reader.readNext() && !reader.isEndElement())
140 {
141 if(reader.isStartElement())
142 {
143 if(reader.name().toString() == "cvParam")
144 {
145 QString accession = reader.attributes().value("accession").toString();
146
147 qDebug() << "accession=" << accession;
148 //<cvParam cvRef="MS" accession="MS:1000523" value="" name="64-bit float" />
149 if(accession == "MS:1000523")
150 {
151 bits = 64;
152 isInt = false;
153 }
154 else if(accession == "MS:1000519")
155 {
156 /*
157 *
158[Term]
159id: MS:1000519
160name: 32-bit integer
161def: "Signed 32-bit little-endian integer." [PSI:MS]
162is_a: MS:1000518 ! binary data type
163*/
164 bits = 32;
165 isInt = true;
166 }
167 else if(accession == "MS:1000521")
168 {
169 /*
170 [Term]
171 id: MS:1000521
172 name: 32-bit float
173 def: "32-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
174 is_a: MS:1000518 ! binary data type
175 */
176 bits = 32;
177 isInt = false;
178 }
179 else if(accession == "MS:1000522")
180 {
181
182 /*
183 [Term]
184 id: MS:1000522
185 name: 64-bit integer
186 def: "Signed 64-bit little-endian integer." [PSI:MS]
187 is_a: MS:1000518 ! binary data type*/
188 bits = 64;
189 isInt = true;
190 }
191
192 //<cvParam cvRef="MS" accession="MS:1000574" value="" name="zlib compression" />
193 else if(accession == "MS:1000574")
194 {
195 compress = "zlib";
196 }
197 else if(accession == "MS:1000576")
198 {
199 /*
200 [Term]
201 id: MS:1000576
202 name: no compression
203 def: "No Compression." [PSI:MS]
204 is_a: MS:1000572 ! binary data compression type
205 */
206 compress = "none";
207 }
208
209 else if(accession == "MS:1000515")
210 {
211 unit = accession;
212 }
213 else if(accession == "MS:1000514")
214 {
215 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
216 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
217
218 unit = accession;
219 }
220 else if(accession == "MS:1000595")
221 {
222 // <cvParam cvRef="MS" accession="MS:1000595" name="time array" value=""
223 // unitCvRef="UO" unitAccession="UO:0000031" unitName="minute"/>
224
225 unit = accession;
226 }
227 else if(accession == "MS:1000786")
228 {
229 // <cvParam cvRef="MS" accession="MS:1000786" name="non-standard data
230 // array" value="ms level" unitCvRef="UO" unitAccession="UO:0000186"
231 // unitName="dimensionless unit"/>
232 //
233 unit = accession;
234 }
235 else
236 {
237 reader.raiseError(
238 QObject::tr("cvParam accession %1 is not known in binaryDataArray")
239 .arg(accession));
241 QObject::tr("cvParam accession %1 is not known in binaryDataArray")
242 .arg(accession));
243 }
244 reader.skipCurrentElement();
245 }
246 else if(reader.name().toString() == "binary")
247 {
248
249 while(reader.readNext() && !reader.isEndElement())
250 {
251 if(reader.isCharacters())
252 {
253 // clean content:
254 QStringView content = reader.text().trimmed();
255 if((reader.text().toString() == "\n") || (reader.text().toString() == "\n\t"))
256 {
257 }
258 else
259 {
260 // text node
261 if(!content.isEmpty())
262 {
263 // qDebug() << "text isCharacters" << content.mid(0, 10);
264
265 if((std::size_t)reader.text().size() != encodedLength)
266 {
267 qWarning() << "reader.text().size() != encodedLength"
268 << reader.text().size() << " " << encodedLength;
269 }
270
271 // mp_cborWriter->append("@text@");
272 // mp_cborWriter->append(content);
273 byteArray = byteArray.fromBase64(reader.text().trimmed().toLatin1());
274 }
275 }
276 }
277 }
278 }
279 else
280 {
281 reader.skipCurrentElement();
282 }
283 }
284 }
285 qDebug();
286}
287
288
289void
291{
292 //<binaryDataArray encodedLength="1152">
293 writer.writeStartElement("binaryDataArray");
294 auto base64 = byteArray.toBase64();
295 writer.writeAttribute("encodedLength", QString("%1").arg(base64.size()));
296 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
297 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
298 CvParam cv_param;
299 cv_param.cvRef = "MS";
300
301 if(unit == "MS:1000514")
302 {
303 cv_param.accession = unit;
304 cv_param.name = "m/z array";
305 cv_param.unitCvRef = "MS";
306 cv_param.unitAccession = "MS:1000040";
307 cv_param.unitName = "m/z";
308 cv_param.setValue("");
309 cv_param.toMzml(writer);
310 }
311 else if(unit == "MS:1000515")
312 {
313 //<cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
314 // unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" />
315
316 cv_param.accession = unit;
317 cv_param.name = "intensity array";
318 cv_param.unitCvRef = "MS";
319 cv_param.unitAccession = "MS:1000131";
320 cv_param.unitName = "number of counts";
321 cv_param.setValue("");
322 cv_param.toMzml(writer);
323 }
324 else if(unit == "MS:1000595")
325 {
326 // <cvParam cvRef="MS" accession="MS:1000595" name="time array" value=""
327 // unitCvRef="UO" unitAccession="UO:0000031" unitName="minute"/>
328
329 cv_param.accession = unit;
330 cv_param.name = "time array";
331 cv_param.unitCvRef = "UO";
332 cv_param.unitAccession = "UO:0000031";
333 cv_param.unitName = "minute";
334 cv_param.setValue("");
335 cv_param.toMzml(writer);
336 }
337 else if(unit == "MS:1000786")
338 {
339 // <cvParam cvRef="MS" accession="MS:1000786" name="non-standard data
340 // array" value="ms level" unitCvRef="UO" unitAccession="UO:0000186"
341 // unitName="dimensionless unit"/>
342 //
343 cv_param.accession = unit;
344 cv_param.name = "non-standard data array";
345 cv_param.unitCvRef = "UO";
346 cv_param.unitAccession = "UO:0000186";
347 cv_param.unitName = "dimensionless unit";
348 cv_param.setValue("ms level");
349 cv_param.toMzml(writer);
350 }
351 else
352 {
354 QObject::tr("unit accession %1 is not known in binaryDataArray").arg(unit));
355 }
356
357 // <cvParam cvRef="MS" accession="MS:1000523" value="" name="64-bit float" />
358
359 cv_param.unitCvRef.clear();
360 cv_param.unitAccession.clear();
361 cv_param.unitName.clear();
362 cv_param.setValue("");
363 if(isInt)
364 {
365 /*
366 id: MS:1000519
367 name: 32-bit integer
368 def: "Signed 32-bit little-endian integer." [PSI:MS]
369 is_a: MS:1000518 ! binary data type*/
370 if(bits == 32)
371 {
372 cv_param.accession = "MS:1000519";
373 cv_param.name = "32-bit integer";
374 cv_param.toMzml(writer);
375 }
376 else if(bits == 64)
377 {
378 /*
379 [Term]
380 id: MS:1000522
381 name: 64-bit integer
382 def: "Signed 64-bit little-endian integer." [PSI:MS]
383 is_a: MS:1000518 ! binary data type*/
384 cv_param.accession = "MS:1000522";
385 cv_param.name = "64-bit integer";
386 cv_param.toMzml(writer);
387 }
388 }
389 else
390 {
391 if(bits == 64)
392 {
393 cv_param.accession = "MS:1000523";
394 cv_param.name = "64-bit float";
395 cv_param.toMzml(writer);
396 }
397 else if(bits == 32)
398 {
399 /*
400 [Term]
401 id: MS:1000521
402 name: 32-bit float
403 def: "32-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
404 is_a: MS:1000518 ! binary data type
405 */
406 cv_param.accession = "MS:1000521";
407 cv_param.name = "32-bit float";
408 cv_param.toMzml(writer);
409 }
410 }
411 // <cvParam cvRef="MS" accession="MS:1000574" value="" name="zlib compression" />
412 /*
413
414[Term]
415id: MS:1000520
416name: 16-bit float
417def: "OBSOLETE Signed 16-bit float." [PSI:MS]
418is_a: MS:1000518 ! binary data type
419is_obsolete: true
420
421
422[Term]
423id: MS:1000523
424name: 64-bit float
425def: "64-bit precision little-endian floating point conforming to IEEE-754." [PSI:MS]
426is_a: MS:1000518 ! binary data type
427*/
428
429 if(compress == "zlib")
430 {
431 cv_param.accession = "MS:1000574";
432 cv_param.name = "zlib compression";
433 cv_param.toMzml(writer);
434 }
435 else if(compress == "none")
436 { /*
437[Term]
438id: MS:1000576
439name: no compression
440def: "No Compression." [PSI:MS]
441is_a: MS:1000572 ! binary data compression type
442*/
443 cv_param.accession = "MS:1000576";
444 cv_param.name = "no compression";
445 cv_param.toMzml(writer);
446 }
447
448 // <binary>eJwl0W9oW1U
449 // writer.writeStartElement("binary");
450 writer.writeTextElement("binary", base64);
451 // </binary>
452 // writer.writeEndElement();
453 // </binaryDataArray>
454 writer.writeEndElement();
455}
456
457
458void
460 std::vector<double> &double_list) const
461{
462 qDebug();
463 int size_in_byte = 8;
464 if(bits == 32)
465 {
466 size_in_byte = 4;
467 }
468
469 qDebug();
470 // if(result.decodingStatus == QByteArray::Base64DecodingStatus::Ok)
471 // { // Allocate buffer for decompressed data
472 if(compress == "zlib")
473 {
474 qDebug();
475 std::vector<unsigned char> data_heap;
476 uLongf decompressedSize = estimated_length * size_in_byte; // Estimate size
477 data_heap.resize(decompressedSize);
478
479 // Decompress the data
480 int result_zlib = uncompress(
481 data_heap.data(), &decompressedSize, (Bytef *)byteArray.constData(), byteArray.size());
482
483 if(result_zlib != Z_OK)
484 {
485 throw pappso::PappsoException(QObject::tr("Decompression failed: %1").arg(result_zlib));
486 }
487
488 // Resize the vector to the actual decompressed size
489 data_heap.resize(decompressedSize);
490 double_list.resize(decompressedSize / size_in_byte);
491
492
493 // double *double_ptr = (double *)&decompressedData[0];
494 std::size_t j = 0;
495 for(std::size_t i = 0; i < data_heap.size(); i += size_in_byte)
496 {
497 if(bits == 32)
498 {
499 if(isInt)
500 {
501 double_list[j] = *(std::int32_t *)&data_heap[i];
502 }
503 else
504 {
505 double_list[j] = *(std::float_t *)&data_heap[i];
506 }
507 }
508 else
509 {
510 if(isInt)
511 {
512 double_list[j] = *(std::int64_t *)&data_heap[i];
513 }
514 else
515 {
516 double_list[j] = *(double *)&data_heap[i];
517 }
518 }
519 // double_ptr++;
520 j++;
521 }
522 }
523 else if(compress == "none")
524 {
525
526 qDebug() << size_in_byte << " " << byteArray.size();
527
528 double_list.resize(byteArray.size() / size_in_byte);
529 // double *double_ptr = (double *)&decompressedData[0];
530 std::size_t j = 0;
531 for(std::size_t i = 0; i < (std::size_t)byteArray.size(); i += size_in_byte)
532 {
533 qDebug() << " i=" << i;
534 if(bits == 32)
535 {
536 if(isInt)
537 {
538 double_list[j] = *(std::int32_t *)&byteArray.constData()[i];
539 }
540 else
541 {
542 double_list[j] = *(std::float_t *)&byteArray.constData()[i];
543 }
544 }
545 else
546 {
547 if(isInt)
548 {
549 double_list[j] = *(std::int64_t *)&byteArray.constData()[i];
550 }
551 else
552 {
553 qDebug();
554 double_list[j] = *(double *)&byteArray.constData()[i];
555 }
556 }
557 // double_ptr++;
558 j++;
559 }
560 }
561
562
563 qDebug();
564 // std::vector<double> v(decompressedData.cbegin(), decompressedData.cend());
565 // qDebug() << j << " " << double_list.size();
566}
567
568bool
570{
571 // <cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
572 // unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" />
573
574 return unit == "MS:1000515";
575}
576
577bool
579{
580 // <cvParam cvRef="MS" accession="MS:1000514" value="" name="m/z array"
581 // unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
582
583 return unit == "MS:1000514";
584}
585
586QJsonObject
588{
589 QJsonObject binary_data;
590 binary_data.insert("unit", unit);
591 std::vector<double> double_list;
592 decodeVector(estimated_length, double_list);
593
594 QJsonArray double_array;
595
596 for(auto &value : double_list)
597 {
598 double_array.append(value);
599 }
600
601 binary_data.insert("array", double_array);
602 return binary_data;
603}
PSI BinaryDataArray object for mzML/mzCBOR.
simple override of the raw QCborStreamReader This adds convenient functions to put CBOR data into C++...
bool decodeString(QString &the_str)
decode the current cbor value as a string the point to the next value the current value is decoded as...
overrides QCborStreamWriter base class to provide convenient functions
PSI cvParam object for mzML/mzCBOR.
void fromCbor(CborStreamReader &reader)
void fromMzml(QXmlStreamReader &reader)
void toMzml(QXmlStreamWriter &writer)
void toCbor(CborStreamWriter &writer)
void decodeVector(std::size_t estimated_length, std::vector< double > &double_list) const
QJsonObject toJsonObject(std::size_t estimated_length) const
write the structure to a JSON object needs to decode the binary array
void setValue(const QString &value_str)
Definition cvparam.cpp:224
void toMzml(QXmlStreamWriter &writer)
Definition cvparam.cpp:232