RDKit
Open-source cheminformatics and machine learning.
SubstanceGroup.h
Go to the documentation of this file.
1 //
2 //
3 // Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 /*! \file SubstanceGroup.h
12 
13  \brief Defines the SubstanceGroup class
14 
15 */
16 #include <RDGeneral/export.h>
17 #ifndef _RD_SGROUP_H
18 #define _RD_SGROUP_H
19 
20 #include <unordered_map>
21 
22 #include <Geometry/point.h>
23 #include <RDGeneral/types.h>
24 #include <RDGeneral/RDProps.h>
25 #include <boost/smart_ptr.hpp>
26 
27 namespace RDKit {
28 class ROMol;
29 class RWMol;
30 class Bond;
31 class Atom;
32 
33 //! used to indicate errors from incorrect sgroup access
35  : public std::runtime_error {
36  public:
37  //! construct with an error message
38  SubstanceGroupException(const char *msg) : std::runtime_error(msg){};
39  //! construct with an error message
40  SubstanceGroupException(const std::string &msg) : std::runtime_error(msg){};
41 };
42 
43 //! The class for representing SubstanceGroups
44 /*!
45  <b>Notes:</b>
46  - These are inspired by the SGroups in the MDL formats
47  - Implementation is based on 2010 MDL SD specification:
48  http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
49  - See SGroups.md for further, more comprehensive notes.
50 
51 */
52 
54  public:
55  //! Bond type (see V3000 spec)
56  enum class BondType {
57  XBOND, // External/Crossing bond
58  CBOND, // Internal/Contained bond
59  };
60 
61  typedef std::array<RDGeom::Point3D, 3> Bracket;
62 
63  //! Data structure for SAP lines (see V3000 spec)
64  //! lvIdx may not be set; this signaled with value -1
65  struct AttachPoint {
66  unsigned int aIdx;
67  int lvIdx;
68  std::string id;
69  bool operator==(const AttachPoint &other) const {
70  return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
71  }
72  };
73 
74  //! See specification for V3000 CSTATE
75  //! vector may or not be considered, depending on TYPE
76  struct CState {
77  unsigned int bondIdx;
79  bool operator==(const CState &other) const {
80  // note that we ignore coordinates for this
81  return bondIdx == other.bondIdx;
82  }
83  };
84 
85 //! No default constructor
86 #ifndef SWIG
87  // Unfortunately, SWIG generated wrapper code uses temporary variables that
88  // require a default ctor not be deleted.
89  SubstanceGroup() = delete;
90 #endif // !SWIG
91 
92  //! Main Constructor. Ownership is only set on this side of the relationship:
93  //! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
94  //! on the other side.
95  SubstanceGroup(ROMol *owning_mol, const std::string &type);
96 
97  SubstanceGroup(const SubstanceGroup &other) = default;
98  SubstanceGroup(SubstanceGroup &&other) = default;
99 
100  SubstanceGroup &operator=(const SubstanceGroup &other) = default;
102 
103  //! Destructor
105 
106  //! returns whether or not this belongs to a molecule
107  bool hasOwningMol() const { return dp_mol != nullptr; };
108 
109  //! Get the molecule that owns this instance
110  ROMol &getOwningMol() const {
111  PRECONDITION(dp_mol, "no owner");
112  return *dp_mol;
113  }
114 
115  //! get the index of this sgroup in dp_mol's sgroups vector
116  //! (do not mistake this by the ID!)00
117  unsigned int getIndexInMol() const;
118 
119  /* Atom and Bond methods */
120  void addAtomWithIdx(unsigned int idx);
121  void addParentAtomWithIdx(unsigned int idx);
122  void addBondWithIdx(unsigned int idx);
123  void addAtomWithBookmark(int mark);
125  void addBondWithBookmark(int mark);
126 
127  void addBracket(const Bracket &bracket);
128  void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
129  void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);
130 
131  BondType getBondType(unsigned int bondIdx) const;
132 
133  const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
134  const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
135  const std::vector<unsigned int> &getBonds() const { return d_bonds; }
136 
137  const std::vector<Bracket> &getBrackets() const { return d_brackets; }
138  const std::vector<CState> &getCStates() const { return d_cstates; }
139  const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }
140 
141  void clearBrackets() { d_brackets.clear(); };
142  void clearCStates() { d_cstates.clear(); };
143  void clearAttachPoints() { d_saps.clear(); };
144 
145  //! adjusts our atom IDs to reflect that an atom has been removed from the
146  //! parent molecule
147  //! decrements all atom IDs that are higher than \c atomIdx
148  //! raises a \c SubstanceGroupException if \c atomIdx is actually part of
149  //! this substance group
150  //! \returns whether or not anything was changed
151  bool adjustToRemovedAtom(unsigned int atomIdx);
152 
153  //! \returns whether or not anything the specified atom is part of the
154  //! definition of this substance group
155  bool includesAtom(unsigned int atomIdx) const;
156 
157  //! adjusts our bond IDs to reflect that a bond has been removed from the
158  //! parent molecule
159  //! decrements all bond IDs that are higher than \c bondIdx
160  //! raises a \c SubstanceGroupException if \c bondIdx is actually part of
161  //! this substance group
162  //! \returns whether or not anything was changed
163  bool adjustToRemovedBond(unsigned int bondIdx);
164 
165  //! \returns whether or not anything the specified bond is part of the
166  //! definition of this substance group
167  bool includesBond(unsigned int bondIdx) const;
168 
169  //! Set owning molecule
170  //! This only updates atoms and bonds; parent sgroup has to be updated
171  //! independently, since parent might not exist at the time this is
172  //! called.
173  void setOwningMol(ROMol *mol);
174 
175  bool operator==(const SubstanceGroup &other) const {
176  // we ignore brackets and cstates, which involve coordinates
177  return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
178  d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
179  d_saps == other.d_saps;
180  }
181 
182  private:
183  ROMol *dp_mol = nullptr; // owning molecule
184 
185  std::vector<unsigned int> d_atoms;
186  std::vector<unsigned int> d_patoms;
187  std::vector<unsigned int> d_bonds;
188 
189  std::vector<Bracket> d_brackets;
190  std::vector<CState> d_cstates;
191  std::vector<AttachPoint> d_saps;
192 };
193 
194 namespace SubstanceGroupChecks {
195 
196 const std::vector<std::string> sGroupTypes = {
197  // polymer sgroups:
198  "SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
199  // formulations/mixtures:
200  "COM", "MIX", "FOR",
201  // other
202  "SUP", "MUL", "DAT", "GEN"};
203 
204 const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
205 const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};
206 
207 RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);
208 
209 RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);
210 
211 RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);
212 
214  unsigned int id);
215 
216 } // namespace SubstanceGroupChecks
217 
218 //! \name SubstanceGroups and molecules
219 //@{
220 
221 RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
222  ROMol &mol);
223 RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
224  const ROMol &mol);
225 
226 //! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
227 //! references to the SubstanceGroup exist.
228 /*!
229  \param sgroup - SubstanceGroup to be added to the molecule.
230 */
232  SubstanceGroup sgroup);
233 
234 //! Removes SubstanceGroups which reference a particular atom index
235 /*!
236  \param mol - molecule to be edited.
237  \param idx - atom index
238 */
240  RWMol &mol, unsigned int idx);
241 //! Removes SubstanceGroups which reference a particular bond index
242 /*!
243  \param mol - molecule to be edited.
244  \param idx - bond index
245 */
247  RWMol &mol, unsigned int idx);
248 //@}
249 
250 } // namespace RDKit
251 
252 //! allows SubstanceGroup objects to be dumped to streams
253 RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
254  const RDKit::SubstanceGroup &sg);
255 #endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::SubstanceGroup &sg)
allows SubstanceGroup objects to be dumped to streams
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
used to indicate errors from incorrect sgroup access
SubstanceGroupException(const std::string &msg)
construct with an error message
SubstanceGroupException(const char *msg)
construct with an error message
The class for representing SubstanceGroups.
const std::vector< Bracket > & getBrackets() const
void addBondWithIdx(unsigned int idx)
void setOwningMol(ROMol *mol)
void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr)
const std::vector< unsigned int > & getAtoms() const
void addParentAtomWithBookmark(int mark)
const std::vector< unsigned int > & getParentAtoms() const
SubstanceGroup(SubstanceGroup &&other)=default
~SubstanceGroup()
Destructor.
const std::vector< unsigned int > & getBonds() const
bool adjustToRemovedBond(unsigned int bondIdx)
void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector)
const std::vector< CState > & getCStates() const
SubstanceGroup()=delete
No default constructor.
bool adjustToRemovedAtom(unsigned int atomIdx)
bool operator==(const SubstanceGroup &other) const
BondType
Bond type (see V3000 spec)
SubstanceGroup(const SubstanceGroup &other)=default
const std::vector< AttachPoint > & getAttachPoints() const
ROMol & getOwningMol() const
Get the molecule that owns this instance.
SubstanceGroup & operator=(const SubstanceGroup &other)=default
void addBondWithBookmark(int mark)
void addAtomWithBookmark(int mark)
bool includesAtom(unsigned int atomIdx) const
SubstanceGroup(ROMol *owning_mol, const std::string &type)
void addParentAtomWithIdx(unsigned int idx)
void addAtomWithIdx(unsigned int idx)
SubstanceGroup & operator=(SubstanceGroup &&other)=default
std::array< RDGeom::Point3D, 3 > Bracket
void addBracket(const Bracket &bracket)
bool hasOwningMol() const
returns whether or not this belongs to a molecule
bool includesBond(unsigned int bondIdx) const
BondType getBondType(unsigned int bondIdx) const
unsigned int getIndexInMol() const
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:346
RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type)
RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type)
const std::vector< std::string > sGroupConnectTypes
RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol, unsigned int id)
RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type)
const std::vector< std::string > sGroupSubtypes
const std::vector< std::string > sGroupTypes
Std stuff.
Definition: Abbreviations.h:17
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol, SubstanceGroup sgroup)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingBond(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular bond index.
RDKIT_GRAPHMOL_EXPORT std::vector< SubstanceGroup > & getSubstanceGroups(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingAtom(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular atom index.
bool operator==(const AttachPoint &other) const
bool operator==(const CState &other) const