IsoSpec
Loading...
Searching...
No Matches
marginalTrek++.h
1/*
2 * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3 *
4 * This file is part of IsoSpec.
5 *
6 * IsoSpec is free software: you can redistribute it and/or modify
7 * it under the terms of the Simplified ("2-clause") BSD licence.
8 *
9 * IsoSpec is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * You should have received a copy of the Simplified BSD Licence
14 * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15 */
16
17#pragma once
18
19#include <queue>
20#include <algorithm>
21#include <vector>
22#include <functional>
23#include <utility>
24#include <memory>
25#include "conf.h"
26#include "allocator.h"
27#include "operators.h"
28#include "summator.h"
29#include "pod_vector.h"
30
31
32namespace IsoSpec
33{
34
36
44{
45 private:
46 bool disowned;
47 protected:
48 const unsigned int isotopeNo;
49 const unsigned int atomCnt;
50 const double* const atom_lProbs;
51 const double* const atom_masses;
52 const double loggamma_nominator;
53 Conf mode_conf;
54 double mode_lprob;
55
56
57 public:
59
67 const double* _masses, // masses size = logProbs size = isotopeNo
68 const double* _probs,
69 int _isotopeNo,
70 int _atomCnt
71 );
72
73 // Get rid of the C++ generated assignment constructor.
74 Marginal& operator= (const Marginal& other) = delete;
75
77 Marginal(const Marginal& other);
78
80 Marginal(Marginal&& other);
81
83 virtual ~Marginal();
84
86
89 inline int get_isotopeNo() const { return isotopeNo; }
90
91 inline const double* get_lProbs() const { return atom_lProbs; }
92
94
97 double getLightestConfMass() const;
98
100
103 double getHeaviestConfMass() const;
104
106
110 double getMonoisotopicConfMass() const;
111
113
116 size_t getMonoisotopicAtomIndex() const;
117
119
122
123 inline double getModeMass() { ensureModeConf(); return calc_mass(mode_conf, atom_masses, isotopeNo); }
124
126
129 inline double getModeLProb() { ensureModeConf(); return mode_lprob; }
130
132 inline double fastGetModeLProb() { return mode_lprob; }
133
135
138// inline double getModeProb() const { return exp(getModeLProb()); }
139
141 Conf computeModeConf() const;
142
144
147 inline double getSmallestLProb() const { return atomCnt * *std::min_element(atom_lProbs, atom_lProbs+isotopeNo); }
148
150
153 double getAtomAverageMass() const;
154
156
159 inline double getTheoreticalAverageMass() const { return getAtomAverageMass() * atomCnt; }
160
162
166 protected:
167 ISOSPEC_FORCE_INLINE double unnormalized_logProb(Conf conf) const { double ret = 0.0; for(size_t ii = 0; ii < isotopeNo; ii++) ret += minuslogFactorial(conf[ii]) + conf[ii] * atom_lProbs[ii]; return ret; }
168 ISOSPEC_FORCE_INLINE double logProb(Conf conf) const { return loggamma_nominator + unnormalized_logProb(conf); }
169 public:
171 double variance() const;
172
174 double getLogSizeEstimate(double logEllipsoidRadius) const;
175
176 inline void ensureModeConf() { if (mode_conf == nullptr) setupMode(); }
177 private:
178 void setupMode();
179};
180
181
183class MarginalTrek : public Marginal
184{
185 private:
186 int current_count;
187 const ConfOrderMarginal orderMarginal;
188 std::priority_queue<ProbAndConfPtr, pod_vector<ProbAndConfPtr> > pq;
190 Allocator<int> allocator;
191 pod_vector<double> _conf_lprobs;
192 pod_vector<double> _conf_masses;
193 pod_vector<int*> _confs;
194
195 const double min_lprob;
196 size_t current_bucket;
197 size_t initialized_until;
198
200 bool add_next_conf();
201
202 size_t bucket_no(double lprob) { return static_cast<size_t>( (mode_lprob - lprob) * 100.0 ); }
203
204 public:
206
211 Marginal&& m,
212 int tabSize = 1000,
213 int hashSize = 1000
214 ); // NOLINT(runtime/explicit) - Constructor deliberately left usable as a conversion.
215
216 MarginalTrek(const MarginalTrek& other) = delete;
217 MarginalTrek& operator=(const MarginalTrek& other) = delete;
218
220
226 inline bool probeConfigurationIdx(int idx)
227 {
228 while(current_count <= idx)
229 if(!add_next_conf())
230 return false;
231 return true;
232 }
233
235
238 inline double getModeLProb() const { return mode_lprob; }
239
240
241 inline const pod_vector<double>& conf_lprobs() const { return _conf_lprobs; }
242 inline const pod_vector<double>& conf_masses() const { return _conf_masses; }
243 inline const pod_vector<Conf>& confs() const { return _confs; }
244
245
246 virtual ~MarginalTrek();
247};
248
249
251
260{
261 protected:
262 pod_vector<Conf> configurations;
263 Conf* confs;
264 unsigned int no_confs;
265 double* masses;
266 pod_vector<double> lProbs;
267 double* probs;
268 Allocator<int> allocator;
269 public:
271
279 Marginal&& m,
280 double lCutOff,
281 bool sort = true,
282 int tabSize = 1000,
283 int hashSize = 1000
284 );
285
286 PrecalculatedMarginal(const PrecalculatedMarginal& other) = delete;
287 PrecalculatedMarginal& operator=(const PrecalculatedMarginal& other) = delete;
288
290 virtual ~PrecalculatedMarginal();
291
293
296 inline bool inRange(unsigned int idx) const { return idx < no_confs; }
297
299
303 inline const double& get_lProb(int idx) const { return lProbs[idx]; }
304
306
310 inline const double& get_prob(int idx) const { return probs[idx]; }
311
313
317 inline const double& get_mass(int idx) const { return masses[idx]; }
318
320
323 inline const double* get_lProbs_ptr() const { return lProbs.data(); }
324
326
329 inline const double* get_masses_ptr() const { return masses; }
330
331
333
337 inline const Conf& get_conf(int idx) const { return confs[idx]; }
338
340
343 inline unsigned int get_no_confs() const { return no_confs; }
344
346
349 inline double getModeLProb() const { return mode_lprob; }
350};
351
352
353
355
359{
360 private:
361 double current_threshold;
362 pod_vector<Conf> configurations;
363 pod_vector<Conf> fringe;
364 pod_vector<double> fringe_unn_lprobs;
365 Allocator<int> allocator;
366 const ConfEqual equalizer;
367 const KeyHasher keyHasher;
368 pod_vector<double> lProbs;
369 pod_vector<double> probs;
370 pod_vector<double> masses;
371 double* guarded_lProbs;
372
373 public:
375
379 LayeredMarginal(Marginal&& m, int tabSize = 1000, int hashSize = 1000); // NOLINT(runtime/explicit) - constructor deliberately left usable as a conversion
380
381 LayeredMarginal(const LayeredMarginal& other) = delete;
382 LayeredMarginal& operator=(const LayeredMarginal& other) = delete;
383
385
389 bool extend(double new_threshold, bool do_sort = true);
390
392 inline double get_lProb(int idx) const { return guarded_lProbs[idx]; } // access to idx == -1 is valid and gives a guardian of +inf
393
395 inline double get_prob(int idx) const { return probs[idx]; }
396
398 inline double get_mass(int idx) const { return masses[idx]; }
399
401 inline const double* get_lProbs_ptr() const { return lProbs.data()+1; }
402
404 inline const Conf& get_conf(int idx) const { return configurations[idx]; }
405
407 inline unsigned int get_no_confs() const { return configurations.size(); }
408
410 double get_min_mass() const;
411
413 double get_max_mass() const;
414
416
419 inline double getModeLProb() const { return mode_lprob; }
420 const pod_vector<double>& conf_lprobs() const { return lProbs; }
421 const pod_vector<double>& conf_masses() const { return masses; }
422
423};
424
425
426
427
428
429
430
431
432template <bool add_guards>
434{
435 private:
436 double current_threshold;
437 pod_vector<double> lProbs;
438 pod_vector<double> probs;
439 pod_vector<double> masses;
440 pod_vector<size_t> original_indexes;
441 double* guarded_lProbs;
442 int extended_to_idx;
443
444 public:
446
448 SingleAtomMarginal(Marginal&& m, int tabSize = 1000, int hashSize = 1000); // NOLINT(runtime/explicit) - constructor deliberately left usable as a conversion
449
450 SingleAtomMarginal(const SingleAtomMarginal& other) = delete;
451 SingleAtomMarginal& operator=(const SingleAtomMarginal& other) = delete;
452
454
458 bool extend(double new_threshold, [[maybe_unused]] bool do_sort = true) {
459 static_assert(add_guards, "SingleAtomMarginal::extend: add_guards must be true");
460 current_threshold = new_threshold;
461 bool extended = false;
462 while(guarded_lProbs[extended_to_idx] >= current_threshold)
463 {
464 extended_to_idx++;
465 extended = true;
466 }
467 return extended || (guarded_lProbs[extended_to_idx] != -std::numeric_limits<double>::infinity());
468 };
469
471 inline double get_lProb(int idx) const { return guarded_lProbs[idx]; } // access to idx == -1 is valid and gives a guardian of +inf
472
474 inline double get_prob(int idx) const { return probs[idx]; }
475
477 inline double get_mass(int idx) const { return masses[idx]; }
478
480 inline const double* get_lProbs_ptr() const { return lProbs.data()+1; }
481
483 inline const Conf& get_conf([[maybe_unused]] int idx) const { throw std::logic_error("SingleAtomMarginal.get_conf: not implemented"); /*return configurations[idx];*/ }
484
486 inline unsigned int get_no_confs() const {
487 if constexpr(add_guards)
488 return extended_to_idx;
489 else
490 return static_cast<unsigned int>(original_indexes.size());}
491
493 inline double get_min_mass() const { throw std::logic_error("SingleAtomMarginal.get_min_mass: not implemented"); };
494
496 double get_max_mass() const { throw std::logic_error("SingleAtomMarginal.get_max_mass: not implemented"); };
497
499
502 inline double getModeLProb() const { return mode_lprob; }
503
504 inline bool probeConfigurationIdx(int idx)
505 {
506 return idx < static_cast<int>(original_indexes.size());
507 }
508 const pod_vector<double>& conf_lprobs() const { return lProbs; }
509 const pod_vector<double>& conf_masses() const { return masses; }
510
511 int get_original_position(int idx) const
512 {
513 #ifdef ISOSPEC_DEBUG
514 if (idx < 0 || static_cast<size_t>(idx) >= original_indexes.size())
515 throw std::out_of_range("Index out of range in SingleAtomMarginal::et_original_position");
516 #endif
517 return original_indexes[idx];
518 }
519};
520
521
522
523
524template <typename T>
525class LoggingMarginal : public Marginal
526{
527 private:
528 std::unique_ptr<T> real_marginal;
529 public:
530
531
532 LoggingMarginal(T&& m) : Marginal(m), real_marginal(std::make_unique(std::move(m))) {}
533 LoggingMarginal(T&& m, int tabSize, int hashSize)
534 : Marginal(m), real_marginal(std::make_unique<T>(std::move(m), tabSize, hashSize)) {}
535
536 LoggingMarginal(const LoggingMarginal& other) = delete;
537 LoggingMarginal& operator=(const LoggingMarginal& other) = delete;
538
539 inline double getModeLProb() const { auto ret = real_marginal->getModeLProb(); std::cout << "LoggingMarginal::getModeLProb: " << ret << std::endl; return ret; }
540 inline double get_lProb(int idx) const { auto ret = real_marginal->get_lProb(idx); std::cout << "LoggingMarginal::get_lProb: " << idx << " " << ret << std::endl; return ret; }
541 inline double get_prob(int idx) const { auto ret = real_marginal->get_prob(idx); std::cout << "LoggingMarginal::get_prob: " << idx << " " << ret << std::endl; return ret; }
542 inline double get_mass(int idx) const { auto ret = real_marginal->get_mass(idx); std::cout << "LoggingMarginal::get_mass: " << idx << " " << ret << std::endl; return ret; }
543 inline const double* get_lProbs_ptr() const { auto ret = real_marginal->get_lProbs_ptr(); std::cout << "LoggingMarginal::get_lProbs_ptr: "; printArray<double>(ret, real_marginal->get_no_confs()); return ret; }
544 inline const Conf& get_conf(int idx) const { auto ret = real_marginal->get_conf(idx); std::cout << "LoggingMarginal::get_conf: " << idx << std::endl; return ret; }
545 inline unsigned int get_no_confs() const { auto ret = real_marginal->get_no_confs(); std::cout << "LoggingMarginal::get_no_confs: " << ret << std::endl; return ret; }
546 inline bool probeConfigurationIdx(int idx) { auto ret = real_marginal->probeConfigurationIdx(idx); std::cout << "LoggingMarginal::probeConfigurationIdx: " << idx << " " << ret << std::endl; return ret; }
547 inline double get_min_mass() const { auto ret = real_marginal->get_min_mass(); std::cout << "LoggingMarginal::get_min_mass: " << ret << std::endl; return ret; }
548 inline double get_max_mass() const { auto ret = real_marginal->get_max_mass(); std::cout << "LoggingMarginal::get_max_mass: " << ret << std::endl; return ret; }
549 inline double getModeMass() const { auto ret = real_marginal->getModeMass(); std::cout << "LoggingMarginal::getModeMass: " << ret << std::endl; return ret; }
550 inline double getLightestConfMass() const { auto ret = real_marginal->getLightestConfMass(); std::cout << "LoggingMarginal::getLightestConfMass: " << ret << std::endl; return ret; }
551 inline double getHeaviestConfMass() const { auto ret = real_marginal->getHeaviestConfMass(); std::cout << "LoggingMarginal::getHeaviestConfMass: " << ret << std::endl; return ret; }
552 inline double getMonoisotopicConfMass() const { auto ret = real_marginal->getMonoisotopicConfMass(); std::cout << "LoggingMarginal::getMonoisotopicConfMass: " << ret << std::endl; return ret; }
553 inline double getAtomAverageMass() const { auto ret = real_marginal->getAtomAverageMass(); std::cout << "LoggingMarginal::getAtomAverageMass: " << ret << std::endl; return ret; }
554 inline double variance() const { auto ret = real_marginal->variance(); std::cout << "LoggingMarginal::variance: " << ret << std::endl; return ret; }
555 inline double getTheoreticalAverageMass() const { auto ret = real_marginal->getTheoreticalAverageMass(); std::cout << "LoggingMarginal::getTheoreticalAverageMass: " << ret << std::endl; return ret; }
556 inline double getLogSizeEstimate(double logEllipsoidRadius) const
557 {
558 auto ret = real_marginal->getLogSizeEstimate(logEllipsoidRadius);
559 std::cout << "LoggingMarginal::getLogSizeEstimate: " << logEllipsoidRadius << " " << ret << std::endl;
560 return ret;
561 }
562 inline void ensureModeConf() { real_marginal->ensureModeConf(); std::cout << "LoggingMarginal::ensureModeConf" << std::endl; }
563 inline const pod_vector<double>& conf_lprobs() const { return real_marginal->conf_lprobs(); }
564 inline const pod_vector<double>& conf_masses() const { return real_marginal->conf_masses(); }
565 inline int get_original_position(int idx) const { return real_marginal->get_original_position(idx); }
566 inline int get_isotopeNo() const { return real_marginal->get_isotopeNo(); }
567 inline const double* get_lProbs() const { return real_marginal->get_lProbs(); }
568 inline const double* get_masses() const { return real_marginal->get_masses(); }
569 inline const double* get_atom_lProbs() const { return real_marginal->get_lProbs(); }
570 inline const double* get_atom_masses() const { return real_marginal->get_masses(); }
571 inline bool extend(double new_threshold, bool do_sort = true)
572 {
573 auto ret = real_marginal->extend(new_threshold, do_sort);
574 std::cout << "LoggingMarginal::extend: " << new_threshold << " " << ret << std::endl;
575 return ret;
576 }
577};
578
579
580} // namespace IsoSpec
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues, see details in PrecalculatedMarginal::get_no_confs.
double get_max_mass() const
Get the maximal mass in current layer.
double get_min_mass() const
Get the minimal mass in current layer.
const double * get_lProbs_ptr() const
get the pointer to lProbs array. Accessing index -1 is legal and returns a guardian of -inf....
double getModeLProb() const
Get the log-probability of the mode subisotopologue.
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
LayeredMarginal(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
const Conf & get_conf(int idx) const
get the counts of isotopes that define the subisotopologue, see details in PrecalculatedMarginal::get...
int get_isotopeNo() const
Get the number of isotopes of the investigated element.
Conf computeModeConf() const
The the probability of the mode subisotopologue.
double getSmallestLProb() const
The the log-probability of the lightest subisotopologue.
Marginal(const double *_masses, const double *_probs, int _isotopeNo, int _atomCnt)
Class constructor.
const unsigned int atomCnt
double fastGetModeLProb()
Get the log-probability of the mode subisotopologue. Results undefined if ensureModeConf() wasn't cal...
double getModeLProb()
Get the log-probability of the mode subisotopologue.
double getModeMass()
The the mass of the mode subisotopologue.
double getLightestConfMass() const
Get the mass of the lightest subisotopologue.
const unsigned int isotopeNo
const double *const atom_masses
double variance() const
Calculate the variance of the theoretical distribution describing the subisotopologue.
ISOSPEC_FORCE_INLINE double unnormalized_logProb(Conf conf) const
Calculate the log-probability of a given subisotopologue.
const double loggamma_nominator
double getHeaviestConfMass() const
Get the mass of the heaviest subisotopologue.
double getAtomAverageMass() const
The average mass of a single atom.
double getMonoisotopicConfMass() const
Get the mass of the monoisotopic subisotopologue.
double getTheoreticalAverageMass() const
The theoretical average mass of the molecule.
virtual ~Marginal()
Destructor.
size_t getMonoisotopicAtomIndex() const
Get the index of the monoisotopic (most probable) atom.
double getLogSizeEstimate(double logEllipsoidRadius) const
Return estimated logarithm of size of the marginal at a given ellipsoid radius.
const double *const atom_lProbs
double getModeLProb() const
Get the log-probability of the mode subisotopologue.
bool probeConfigurationIdx(int idx)
Check if the table of computed subisotopologues does not have to be extended.
MarginalTrek(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues.
const double * get_lProbs_ptr() const
Get the table of the log-probabilities of subisotopologues.
const double * get_masses_ptr() const
Get the table of the masses of subisotopologues.
const double & get_prob(int idx) const
Get the probability of the idx-th subisotopologue.
virtual ~PrecalculatedMarginal()
Destructor.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
bool inRange(unsigned int idx) const
Is there a subisotopologue with a given number?
const Conf & get_conf(int idx) const
Get the counts of isotopes that define the subisotopologue.
PrecalculatedMarginal(Marginal &&m, double lCutOff, bool sort=true, int tabSize=1000, int hashSize=1000)
The move constructor (disowns the Marginal).
const double & get_mass(int idx) const
Get the mass of the idx-th subisotopologue.
double getModeLProb() const
Get the log-probability of the mode subisotopologue.
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
double get_min_mass() const
Get the minimal mass in current layer.
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
const double * get_lProbs_ptr() const
get the pointer to lProbs array. Accessing index -1 is legal and returns a guardian of -inf....
SingleAtomMarginal(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
double get_max_mass() const
Get the maximal mass in current layer.
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
double getModeLProb() const
Get the log-probability of the mode subisotopologue.
const Conf & get_conf(int idx) const
get the counts of isotopes that define the subisotopologue, see details in PrecalculatedMarginal::get...
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues, see details in PrecalculatedMarginal::get_no_confs.
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.