24 #include <unordered_map>
37 #include "dirtyAllocator.h"
38 #include "operators.h"
40 #include "marginalTrek++.h"
41 #include "isoSpec++.h"
43 #include "element_tables.h"
53 const int* _isotopeNumbers,
54 const int* _atomCounts,
55 const double*
const * _isotopeMasses,
56 const double*
const * _isotopeProbabilities
59 dimNumber(_dimNumber),
60 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
61 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
62 confSize(_dimNumber * sizeof(int)),
68 setupMarginals(_isotopeMasses, _isotopeProbabilities);
79 disowned(other.disowned),
80 dimNumber(other.dimNumber),
81 isotopeNumbers(other.isotopeNumbers),
82 atomCounts(other.atomCounts),
83 confSize(other.confSize),
85 marginals(other.marginals),
86 modeLProb(other.modeLProb)
88 other.disowned =
true;
93 disowned(fullcopy ? throw std::logic_error(
"Not implemented") : true),
94 dimNumber(other.dimNumber),
95 isotopeNumbers(fullcopy ? array_copy<int>(other.isotopeNumbers, dimNumber) : other.isotopeNumbers),
96 atomCounts(fullcopy ? array_copy<int>(other.atomCounts, dimNumber) : other.atomCounts),
97 confSize(other.confSize),
99 marginals(fullcopy ? throw std::logic_error(
"Not implemented") : other.marginals),
100 modeLProb(other.modeLProb)
104 inline void Iso::setupMarginals(
const double*
const * _isotopeMasses,
const double*
const * _isotopeProbabilities)
117 _isotopeProbabilities[ii],
157 mass +=
marginals[ii]->getLightestConfMass();
165 mass +=
marginals[ii]->getHeaviestConfMass();
177 std::vector<const double*> isotope_masses;
178 std::vector<const double*> isotope_probabilities;
182 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
185 unsigned int parse_formula(
const char* formula, std::vector<const double*>& isotope_masses, std::vector<const double*>& isotope_probabilities,
int** isotopeNumbers,
int** atomCounts,
unsigned int* confSize)
188 size_t slen = strlen(formula);
192 std::vector<std::pair<const char*, size_t> > elements;
193 std::vector<int> numbers;
196 throw invalid_argument(
"Invalid formula: can't be empty");
198 if(!isdigit(formula[slen-1]))
199 throw invalid_argument(
"Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
201 for(
size_t ii=0; ii<slen; ii++)
202 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
203 throw invalid_argument(
"Ivalid formula: contains invalid (non-digit, non-alpha) character");
207 size_t digit_end = 0;
209 while(position < slen)
212 while(isalpha(formula[elem_end]))
214 digit_end = elem_end;
215 while(isdigit(formula[digit_end]))
217 elements.emplace_back(&formula[position], elem_end-position);
218 numbers.push_back(atoi(&formula[elem_end]));
219 position = digit_end;
222 std::vector<int> element_indexes;
224 for (
unsigned int i=0; i<elements.size(); i++)
227 for(
int j=0; j<ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
229 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
236 throw invalid_argument(
"Invalid formula");
237 element_indexes.push_back(idx);
240 vector<int> _isotope_numbers;
242 for(vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
246 int atomicNo = elem_table_atomicNo[at_idx];
247 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_atomicNo[at_idx] == atomicNo)
252 _isotope_numbers.push_back(num);
255 for(vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
257 isotope_masses.push_back(&elem_table_mass[*it]);
258 isotope_probabilities.push_back(&elem_table_probability[*it]);
261 const unsigned int dimNumber = elements.size();
263 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
264 *atomCounts = array_copy<int>(numbers.data(), dimNumber);
265 *confSize = dimNumber *
sizeof(int);
280 partialLProbs(alloc_partials ? new double[dimNumber+1] : nullptr),
281 partialMasses(alloc_partials ? new double[dimNumber+1] : nullptr),
282 partialProbs(alloc_partials ? new double[dimNumber+1] : nullptr)
313 Lcutoff(_threshold <= 0.0 ? std::numeric_limits<double>::lowest() : (_absolute ? log(_threshold) : log(_threshold) + modeLProb))
330 if(!marginalResultsUnsorted[ii]->inRange(0))
334 if(reorder_marginals)
337 int* tmpMarginalOrder =
new int[
dimNumber];
340 tmpMarginalOrder[ii] = ii;
342 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, comparator);
346 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
350 marginalOrder[tmpMarginalOrder[ii]] = ii;
352 delete[] tmpMarginalOrder;
357 marginalResults = marginalResultsUnsorted;
358 marginalOrder =
nullptr;
367 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->
getModeLProb();
369 lProbs_ptr = lProbs_ptr_start;
372 partialLProbs_second++;
383 lcfmsv = std::numeric_limits<double>::infinity();
395 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
398 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->
get_no_confs()-1;
421 memset(counter, 0,
sizeof(
int)*
dimNumber);
425 lProbs_ptr = lProbs_ptr_start - 1;
433 IsoGenerator(std::move(iso), false), allocator(dimNumber, _tabSize)
444 logProbs =
new const vector<double>*[
dimNumber];
445 masses =
new const vector<double>*[
dimNumber];
446 marginalConfs =
new const vector<int*>*[
dimNumber];
450 masses[i] = &marginalResults[i]->conf_masses();
451 logProbs[i] = &marginalResults[i]->conf_lprobs();
452 marginalConfs[i] = &marginalResults[i]->confs();
455 topConf = allocator.newConf();
457 reinterpret_cast<char*>(topConf) +
sizeof(
double),
462 *(reinterpret_cast<double*>(topConf)) =
476 dealloc_table<MarginalTrek*>(marginalResults,
dimNumber);
479 delete[] marginalConfs;
495 int* topConfIsoCounts = getConf(topConf);
497 currentLProb = *(reinterpret_cast<double*>(topConf));
498 currentMass = combinedSum( topConfIsoCounts, masses,
dimNumber );
499 currentProb = exp(currentLProb);
504 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
508 topConfIsoCounts[j]++;
509 *(reinterpret_cast<double*>(topConf)) = combinedSum(topConfIsoCounts, logProbs,
dimNumber);
511 topConfIsoCounts[j]--;
516 void* acceptedCandidate = allocator.newConf();
517 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
518 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts,
confSize);
520 acceptedCandidateIsoCounts[j]++;
522 *(reinterpret_cast<double*>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs,
dimNumber);
524 pq.push(acceptedCandidate);
527 if(topConfIsoCounts[j] > 0)
531 topConfIsoCounts[ccount]++;
545 #if !ISOSPEC_BUILDING_R
547 void printConfigurations(
548 const std::tuple<double*,double*,int*,int>& results,
554 for(
int i=0; i<std::get<3>(results); i++){
556 std::cout <<
"Mass = " << std::get<0>(results)[i] <<
557 "\tand log-prob = " << std::get<1>(results)[i] <<
558 "\tand prob = " << exp(std::get<1>(results)[i]) <<
559 "\tand configuration =\t";
562 for(
int j=0; j<dimNumber; j++){
563 for(
int k=0; k<isotopeNumbers[j]; k++ )
565 std::cout << std::get<2>(results)[m] <<
" ";
572 std::cout << std::endl;
580 IsoLayeredGenerator::IsoLayeredGenerator( Iso&& iso,
581 double _targetCoverage,
582 double _percentageToExpand,
586 ) : IsoGenerator(std::move(iso)),
587 allocator(dimNumber, _tabSize),
588 candidate(new int[dimNumber]),
589 targetCoverage(_targetCoverage >= 1.0 ? 10000.0 : _targetCoverage),
592 percentageToExpand(_percentageToExpand),
595 generator_position(-1)
597 marginalResults =
new MarginalTrek*[
dimNumber];
600 marginalResults[i] =
new MarginalTrek(std::move(*(
marginals[i])), _tabSize, _hashSize);
602 logProbs =
new const vector<double>*[
dimNumber];
603 masses =
new const vector<double>*[
dimNumber];
604 marginalConfs =
new const vector<int*>*[
dimNumber];
608 masses[i] = &marginalResults[i]->conf_masses();
609 logProbs[i] = &marginalResults[i]->conf_lprobs();
610 marginalConfs[i] = &marginalResults[i]->confs();
613 void* topConf = allocator.newConf();
614 memset(reinterpret_cast<char*>(topConf) +
sizeof(
double), 0,
sizeof(
int)*
dimNumber);
615 *(reinterpret_cast<double*>(topConf)) = combinedSum(getConf(topConf), logProbs,
dimNumber);
617 current =
new std::vector<void*>();
618 next =
new std::vector<void*>();
620 current->push_back(topConf);
622 lprobThr = (*reinterpret_cast<double*>(topConf));
624 if(targetCoverage > 0.0)
625 while(advanceToNextLayer()) {};
629 IsoLayeredGenerator::~IsoLayeredGenerator()
631 if(current !=
nullptr)
637 delete[] marginalConfs;
639 dealloc_table(marginalResults,
dimNumber);
642 bool IsoLayeredGenerator::advanceToNextLayer()
645 double maxFringeLprob = -std::numeric_limits<double>::infinity();
647 if(current ==
nullptr)
649 int accepted_in_this_layer = 0;
650 Summator prob_in_this_layer(totalProb);
654 while(current->size() > 0)
656 topConf = current->back();
659 double top_lprob = getLProb(topConf);
661 if(top_lprob >= lprobThr)
663 newaccepted.push_back(topConf);
664 accepted_in_this_layer++;
665 prob_in_this_layer.add(exp(top_lprob));
669 next->push_back(topConf);
673 int* topConfIsoCounts = getConf(topConf);
679 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
681 memcpy(candidate, topConfIsoCounts,
confSize);
684 void* acceptedCandidate = allocator.newConf();
685 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
686 memcpy( acceptedCandidateIsoCounts, candidate,
confSize);
688 double newConfProb = combinedSum(
696 *(reinterpret_cast<double*>(acceptedCandidate)) = newConfProb;
698 if(newConfProb >= lprobThr)
699 current->push_back(acceptedCandidate);
702 next->push_back(acceptedCandidate);
703 if(newConfProb > maxFringeLprob)
704 maxFringeLprob = top_lprob;
707 if(topConfIsoCounts[j] > 0)
712 if(next ==
nullptr || next->size() < 1)
716 if(prob_in_this_layer.get() < targetCoverage)
718 std::vector<void*>* nnew = current;
722 int howmany = floor(current->size()*percentageToExpand);
723 lprobThr = getLProb(
quickselect(current->data(), howmany, 0, current->size()));
724 totalProb = prob_in_this_layer;
733 int end = accepted_in_this_layer - 1;
738 void** lastLayer = &(newaccepted.data()[newaccepted.size()-accepted_in_this_layer]);
740 Summator qsprob(totalProb);
741 while(totalProb.get() < targetCoverage)
748 int len = end - start;
749 #if ISOSPEC_BUILDING_R
750 int pivot = len/2 + start;
752 int pivot = rand() % len + start;
754 void* pval = lastLayer[pivot];
755 double pprob = getLProb(pval);
756 mswap(lastLayer[pivot], lastLayer[end-1]);
757 int loweridx = start;
758 for(
int i=start; i<end-1; i++)
760 if(getLProb(lastLayer[i]) > pprob)
762 mswap(lastLayer[i], lastLayer[loweridx]);
766 mswap(lastLayer[end-1], lastLayer[loweridx]);
770 Summator leftProb(qsprob);
771 for(
int i=start; i<=loweridx; i++)
773 leftProb.add(exp(getLProb(lastLayer[i])));
775 if(leftProb.get() < targetCoverage)
783 int accend = newaccepted.size()-accepted_in_this_layer+start+1;
786 newaccepted.resize(accend);
791 totalProb = prob_in_this_layer;
802 generator_position++;
803 if(generator_position < newaccepted.size())
805 partialLProbs[0] = getLProb(newaccepted[generator_position]);