RDKit
Open-source cheminformatics and machine learning.
Seed.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #pragma once
11 #include <map>
12 #include "../RDKitBase.h"
13 #include "DebugTrace.h" // algorithm optimisation definitions
14 #include "Graph.h"
15 #include "DuplicatedSeedCache.h"
16 #include "SubstructMatchCustom.h"
17 
18 namespace RDKit {
19  namespace FMCS {
20  class MaximumCommonSubgraph;
21  struct TargetMatch;
22 
23  struct MolFragment { // Reference to a fragment of source molecule
24  std::vector<const Atom*> Atoms;
25  std::vector<const Bond*> Bonds;
26  std::vector<unsigned> AtomsIdx;
27  std::vector<unsigned> BondsIdx; // need for results and size() only !
28  std::map<unsigned,unsigned> SeedAtomIdxMap; // Full Query Molecule to Seed indeces backward conversion map
29  };
30 
31  struct NewBond {
32  unsigned SourceAtomIdx; // index in the seed. Atom is already in the seed
33  unsigned BondIdx; // index in qmol of new bond scheduled to be added into seed. This is outgoing bond from SourceAtomIdx
34  unsigned NewAtomIdx; // index in qmol of new atom scheduled to be added into seed. Another end of new bond
35  const Atom* NewAtom; // pointer to qmol's new atom scheduled to be added into seed. Another end of new bond
36  unsigned EndAtomIdx; // index in the seed. RING. "New" Atom on the another end of new bond is already exists in the seed.
37 
38  NewBond() : SourceAtomIdx(-1), BondIdx(-1), NewAtomIdx(-1), NewAtom(0), EndAtomIdx(-1) {}
39 
40  NewBond(unsigned from_atom, unsigned bond_idx, unsigned new_atom, unsigned to_atom, const Atom* a)
41  : SourceAtomIdx(from_atom), BondIdx(bond_idx), NewAtomIdx(new_atom), NewAtom(a), EndAtomIdx(to_atom) {}
42  };
43 
44  class Seed {
45  private:
46  mutable std::vector<NewBond> NewBonds; // for multistage growing. all directly connected outgoing bonds
47  public:
48  bool CopyComplete; // this seed has been completely copied into list. postponed non0locked copy for MULTI_THREAD
49  mutable unsigned GrowingStage; // 0 new seed; -1 finished; n>0 in progress, exact stage of growing for SDF
50  MolFragment MoleculeFragment; // Reference to a fragment of source molecule
51  Graph Topology; // seed topology with references to source molecule
52 
53  std::vector<bool> ExcludedBonds;
54  unsigned LastAddedAtomsBeginIdx; // in this subgraph for improving performance of future growing
55  unsigned LastAddedBondsBeginIdx; // in this subgraph for DEBUG ONLY
56  unsigned RemainingBonds;
57  unsigned RemainingAtoms;
58 #ifdef DUP_SUBSTRUCT_CACHE
60 #endif
61  std::vector<TargetMatch> MatchResult; // for each target
62  public:
63  Seed() : CopyComplete(false)
64  , GrowingStage(0), LastAddedAtomsBeginIdx(0), LastAddedBondsBeginIdx(0)
65  , RemainingBonds(-1), RemainingAtoms(-1)
66  {}
67 
68  void setMoleculeFragment(const Seed& src) {
69  MoleculeFragment = src.MoleculeFragment;
70  }
71  Seed& operator = (const Seed& src) {
72  NewBonds = src.NewBonds;
73  GrowingStage = src.GrowingStage;
74  MoleculeFragment = src.MoleculeFragment;
75  Topology = src.Topology;
76  ExcludedBonds = src.ExcludedBonds;
77  LastAddedAtomsBeginIdx = src.LastAddedAtomsBeginIdx;
78  LastAddedBondsBeginIdx = src.LastAddedBondsBeginIdx;
79  RemainingBonds = src.RemainingBonds;
80  RemainingAtoms = src.RemainingAtoms;
81 #ifdef DUP_SUBSTRUCT_CACHE
82  DupCacheKey = src.DupCacheKey;
83 #endif
84  MatchResult = src.MatchResult;
85  CopyComplete = true; // LAST
86  return *this;
87  }
88  void createFromParent(const Seed* parent) {
89  MoleculeFragment = parent->MoleculeFragment;
90  Topology = parent->Topology;
91  ExcludedBonds = parent->ExcludedBonds;
92  RemainingBonds = parent->RemainingBonds;
93  RemainingAtoms = parent->RemainingAtoms;
94 #ifdef DUP_SUBSTRUCT_CACHE
95  DupCacheKey = parent->DupCacheKey;
96 #endif
97  LastAddedAtomsBeginIdx = getNumAtoms(); // previous size
98  LastAddedBondsBeginIdx = getNumBonds(); // previous size
99  GrowingStage = 0;
100  }
101 
102  unsigned getNumAtoms()const {
103  return MoleculeFragment.AtomsIdx.size();
104  }
105  unsigned getNumBonds()const {
106  return MoleculeFragment.BondsIdx.size();
107  }
108 
109  void grow(MaximumCommonSubgraph& mcs)const;
110  bool canGrowBiggerThan(unsigned maxBonds, unsigned maxAtoms)const { // prune()
111  return RemainingBonds + getNumBonds() > maxBonds
112  ||(RemainingBonds + getNumBonds() == maxBonds && RemainingAtoms + getNumAtoms() > maxAtoms);
113 
114  }
115  void computeRemainingSize(const ROMol& qmol);
116 
117  unsigned addAtom (const Atom* atom);
118  unsigned addBond (const Bond* bond);
119  void fillNewBonds(const ROMol& qmol);
120  };
121 
122  }
123 }
unsigned BondIdx
Definition: Seed.h:33
std::vector< TargetMatch > MatchResult
Definition: Seed.h:61
std::vector< unsigned > BondsIdx
Definition: Seed.h:27
void setMoleculeFragment(const Seed &src)
Definition: Seed.h:68
unsigned SourceAtomIdx
Definition: Seed.h:32
std::map< unsigned, unsigned > SeedAtomIdxMap
Definition: Seed.h:28
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
unsigned RemainingAtoms
Definition: Seed.h:57
unsigned RemainingBonds
Definition: Seed.h:56
unsigned NewAtomIdx
Definition: Seed.h:34
std::vector< bool > ExcludedBonds
Definition: Seed.h:53
unsigned getNumBonds() const
Definition: Seed.h:105
unsigned LastAddedAtomsBeginIdx
Definition: Seed.h:54
unsigned GrowingStage
Definition: Seed.h:49
Graph Topology
Definition: Seed.h:51
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
unsigned EndAtomIdx
Definition: Seed.h:36
class for representing a bond
Definition: Bond.h:46
bool CopyComplete
Definition: Seed.h:48
bool canGrowBiggerThan(unsigned maxBonds, unsigned maxAtoms) const
Definition: Seed.h:110
MolFragment MoleculeFragment
Definition: Seed.h:50
DuplicatedSeedCache::TKey DupCacheKey
Definition: Seed.h:59
std::vector< const Bond * > Bonds
Definition: Seed.h:25
NewBond(unsigned from_atom, unsigned bond_idx, unsigned new_atom, unsigned to_atom, const Atom *a)
Definition: Seed.h:40
void createFromParent(const Seed *parent)
Definition: Seed.h:88
unsigned LastAddedBondsBeginIdx
Definition: Seed.h:55
unsigned getNumAtoms() const
Definition: Seed.h:102
const Atom * NewAtom
Definition: Seed.h:35
std::vector< const Atom * > Atoms
Definition: Seed.h:24
The class for representing atoms.
Definition: Atom.h:67
std::vector< unsigned > AtomsIdx
Definition: Seed.h:26