RDKit
Open-source cheminformatics and machine learning.
Seed.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #pragma once
12 #include <map>
13 #include "../RDKitBase.h"
14 #include "DebugTrace.h" // algorithm optimisation definitions
15 #include "Graph.h"
16 #include "DuplicatedSeedCache.h"
17 #include "SubstructMatchCustom.h"
18 
19 namespace RDKit {
20 namespace FMCS {
21 class MaximumCommonSubgraph;
22 struct TargetMatch;
23 
24 struct RDKIT_FMCS_EXPORT MolFragment { // Reference to a fragment of source molecule
25  std::vector<const Atom*> Atoms;
26  std::vector<const Bond*> Bonds;
27  std::vector<unsigned> AtomsIdx;
28  std::vector<unsigned> BondsIdx; // need for results and size() only !
29  std::map<unsigned, unsigned> SeedAtomIdxMap; // Full Query Molecule to Seed
30  // indeces backward conversion
31  // map
32 };
33 
35  unsigned SourceAtomIdx; // index in the seed. Atom is already in the seed
36  unsigned BondIdx; // index in qmol of new bond scheduled to be added into
37  // seed. This is outgoing bond from SourceAtomIdx
38  unsigned NewAtomIdx; // index in qmol of new atom scheduled to be added into
39  // seed. Another end of new bond
40  const Atom* NewAtom; // pointer to qmol's new atom scheduled to be added into
41  // seed. Another end of new bond
42  unsigned EndAtomIdx; // index in the seed. RING. "New" Atom on the another
43  // end of new bond is already exists in the seed.
44 
46  : SourceAtomIdx(-1),
47  BondIdx(-1),
48  NewAtomIdx(-1),
49  NewAtom(0),
50  EndAtomIdx(-1) {}
51 
52  NewBond(unsigned from_atom, unsigned bond_idx, unsigned new_atom,
53  unsigned to_atom, const Atom* a)
54  : SourceAtomIdx(from_atom),
55  BondIdx(bond_idx),
56  NewAtomIdx(new_atom),
57  NewAtom(a),
58  EndAtomIdx(to_atom) {}
59 };
60 
62  private:
63  mutable std::vector<NewBond> NewBonds; // for multistage growing. all
64  // directly connected outgoing bonds
65  public:
66  bool CopyComplete; // this seed has been completely copied into list.
67  // postponed non0locked copy for MULTI_THREAD
68  mutable unsigned GrowingStage; // 0 new seed; -1 finished; n>0 in progress,
69  // exact stage of growing for SDF
70  MolFragment MoleculeFragment; // Reference to a fragment of source molecule
71  Graph Topology; // seed topology with references to source molecule
72 
73  std::vector<bool> ExcludedBonds;
74  unsigned LastAddedAtomsBeginIdx; // in this subgraph for improving
75  // performance of future growing
76  unsigned LastAddedBondsBeginIdx; // in this subgraph for DEBUG ONLY
77  unsigned RemainingBonds;
78  unsigned RemainingAtoms;
79 #ifdef DUP_SUBSTRUCT_CACHE
81 #endif
82  std::vector<TargetMatch> MatchResult; // for each target
83  public:
84  Seed()
85  : CopyComplete(false),
86  GrowingStage(0),
87  LastAddedAtomsBeginIdx(0),
88  LastAddedBondsBeginIdx(0),
89  RemainingBonds(-1),
90  RemainingAtoms(-1) {}
91 
92  void setMoleculeFragment(const Seed& src) {
93  MoleculeFragment = src.MoleculeFragment;
94  }
95  Seed& operator=(const Seed& src) {
96  NewBonds = src.NewBonds;
97  GrowingStage = src.GrowingStage;
98  MoleculeFragment = src.MoleculeFragment;
99  Topology = src.Topology;
100  ExcludedBonds = src.ExcludedBonds;
101  LastAddedAtomsBeginIdx = src.LastAddedAtomsBeginIdx;
102  LastAddedBondsBeginIdx = src.LastAddedBondsBeginIdx;
103  RemainingBonds = src.RemainingBonds;
104  RemainingAtoms = src.RemainingAtoms;
105 #ifdef DUP_SUBSTRUCT_CACHE
106  DupCacheKey = src.DupCacheKey;
107 #endif
108  MatchResult = src.MatchResult;
109  CopyComplete = true; // LAST
110  return *this;
111  }
112  void createFromParent(const Seed* parent) {
113  MoleculeFragment = parent->MoleculeFragment;
114  Topology = parent->Topology;
115  ExcludedBonds = parent->ExcludedBonds;
116  RemainingBonds = parent->RemainingBonds;
117  RemainingAtoms = parent->RemainingAtoms;
118 #ifdef DUP_SUBSTRUCT_CACHE
119  DupCacheKey = parent->DupCacheKey;
120 #endif
121  LastAddedAtomsBeginIdx = getNumAtoms(); // previous size
122  LastAddedBondsBeginIdx = getNumBonds(); // previous size
123  GrowingStage = 0;
124  }
125 
126  unsigned getNumAtoms() const { return MoleculeFragment.AtomsIdx.size(); }
127  unsigned getNumBonds() const { return MoleculeFragment.BondsIdx.size(); }
128 
129  void grow(MaximumCommonSubgraph& mcs) const;
130  bool canGrowBiggerThan(unsigned maxBonds,
131  unsigned maxAtoms) const { // prune()
132  return RemainingBonds + getNumBonds() > maxBonds ||
133  (RemainingBonds + getNumBonds() == maxBonds &&
134  RemainingAtoms + getNumAtoms() > maxAtoms);
135  }
136  void computeRemainingSize(const ROMol& qmol);
137 
138  unsigned addAtom(const Atom* atom);
139  unsigned addBond(const Bond* bond);
140  void fillNewBonds(const ROMol& qmol);
141 };
142 }
143 }
unsigned BondIdx
Definition: Seed.h:36
std::vector< TargetMatch > MatchResult
Definition: Seed.h:82
std::vector< unsigned > BondsIdx
Definition: Seed.h:28
std::map< unsigned, unsigned > SeedAtomIdxMap
Definition: Seed.h:29
void setMoleculeFragment(const Seed &src)
Definition: Seed.h:92
unsigned SourceAtomIdx
Definition: Seed.h:35
bool canGrowBiggerThan(unsigned maxBonds, unsigned maxAtoms) const
Definition: Seed.h:130
unsigned RemainingAtoms
Definition: Seed.h:78
#define RDKIT_FMCS_EXPORT
Definition: export.h:190
unsigned RemainingBonds
Definition: Seed.h:77
unsigned NewAtomIdx
Definition: Seed.h:38
std::vector< bool > ExcludedBonds
Definition: Seed.h:73
unsigned LastAddedAtomsBeginIdx
Definition: Seed.h:74
unsigned GrowingStage
Definition: Seed.h:68
unsigned getNumAtoms() const
Definition: Seed.h:126
Graph Topology
Definition: Seed.h:71
Std stuff.
Definition: Atom.h:30
unsigned EndAtomIdx
Definition: Seed.h:42
class for representing a bond
Definition: Bond.h:47
bool CopyComplete
Definition: Seed.h:66
MolFragment MoleculeFragment
Definition: Seed.h:70
DuplicatedSeedCache::TKey DupCacheKey
Definition: Seed.h:80
std::vector< const Bond * > Bonds
Definition: Seed.h:26
NewBond(unsigned from_atom, unsigned bond_idx, unsigned new_atom, unsigned to_atom, const Atom *a)
Definition: Seed.h:52
void createFromParent(const Seed *parent)
Definition: Seed.h:112
Seed & operator=(const Seed &src)
Definition: Seed.h:95
unsigned LastAddedBondsBeginIdx
Definition: Seed.h:76
const Atom * NewAtom
Definition: Seed.h:40
std::vector< const Atom * > Atoms
Definition: Seed.h:25
The class for representing atoms.
Definition: Atom.h:69
std::vector< unsigned > AtomsIdx
Definition: Seed.h:27
unsigned getNumBonds() const
Definition: Seed.h:127