RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
AlignMolecules.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef _RD_ALIGNMOLECULES_H_
12#define _RD_ALIGNMOLECULES_H_
13
15#include <Numerics/Vector.h>
16#include <vector>
17
18namespace RDKit {
19typedef std::vector<std::pair<int, int>> MatchVectType;
20
21class Conformer;
22class ROMol;
23namespace MolAlign {
24class RDKIT_MOLALIGN_EXPORT MolAlignException : public std::exception {
25 public:
26 //! construct with an error message
27 MolAlignException(const char *msg) : _msg(msg) {}
28 //! construct with an error message
29 MolAlignException(const std::string msg) : _msg(msg) {}
30 //! get the error message
31 const char *what() const noexcept override { return _msg.c_str(); }
32 ~MolAlignException() noexcept override = default;
33
34 private:
35 std::string _msg;
36};
37
38//! Alignment functions
39
40//! Compute the transformation required to align a molecule
41/*!
42 The 3D transformation required to align the specified conformation in the
43 probe molecule to a specified conformation in the reference molecule is
44 computed so that the root mean squared distance between a specified set of
45 atoms is minimized
46
47 \param prbMol molecule that is to be aligned
48 \param refMol molecule used as the reference for the alignment
49 \param trans storage for the computed transform
50 \param prbCid ID of the conformation in the probe to be used
51 for the alignment (defaults to first conformation)
52 \param refCid ID of the conformation in the ref molecule to which
53 the alignment is computed (defaults to first conformation)
54 \param atomMap a vector of pairs of atom IDs (probe AtomId, ref AtomId)
55 used to compute the alignments. If this mapping is
56 not specified an attempt is made to generate on by
57 substructure matching
58 \param weights Optionally specify weights for each of the atom pairs
59 \param reflect if true reflect the conformation of the probe molecule
60 \param maxIters maximum number of iterations used in minimizing the RMSD
61
62 <b>Returns</b>
63 RMSD value
64*/
65RDKIT_MOLALIGN_EXPORT double getAlignmentTransform(
66 const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &trans,
67 int prbCid = -1, int refCid = -1, const MatchVectType *atomMap = nullptr,
68 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
69 unsigned int maxIters = 50);
70
71//! Optimally (minimum RMSD) align a molecule to another molecule
72/*!
73 The 3D transformation required to align the specified conformation in the
74 probe molecule to a specified conformation in the reference molecule is
75 computed so that the root mean squared distance between a specified set of
76 atoms is minimized. This transform is then applied to the specified
77 conformation in the probe molecule
78
79 \param prbMol molecule that is to be aligned
80 \param refMol molecule used as the reference for the alignment
81 \param prbCid ID of the conformation in the probe to be used
82 for the alignment (defaults to first conformation)
83 \param refCid ID of the conformation in the ref molecule to which
84 the alignment is computed (defaults to first conformation)
85 \param atomMap a vector of pairs of atom IDs (probe AtomId, ref AtomId)
86 used to compute the alignments. If this mapping is
87 not specified an attempt is made to generate on by
88 substructure matching
89 \param weights Optionally specify weights for each of the atom pairs
90 \param reflect if true reflect the conformation of the probe molecule
91 \param maxIters maximum number of iterations used in minimizing the RMSD
92
93 <b>Returns</b>
94 RMSD value
95*/
96RDKIT_MOLALIGN_EXPORT double alignMol(
97 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
98 const MatchVectType *atomMap = nullptr,
99 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
100 unsigned int maxIters = 50);
101
102//! Compute the optimal RMS, transformation and atom map for aligning
103//! two molecules, taking symmetry into account. Molecule coordinates
104//! are left unaltered.
105/*!
106 This function will attempt to align all permutations of matching atom
107 orders in both molecules, for some molecules it will lead to 'combinatorial
108 explosion' especially if hydrogens are present.
109 Use 'RDKit::MolAlign::getAlignmentTransform' to align molecules
110 without changing the atom order.
111
112 \param prbMol the molecule to be aligned to the reference
113 \param refMol the reference molecule
114 \param bestTrans storage for the best computed transform
115 \param bestMatch storage for the MatchVectType corresponding to
116 the best match found.
117 \param prbCid (optional) probe conformation to use
118 \param refCid (optional) reference conformation to use
119 \param map (optional) a vector of vectors of pairs of atom IDs
120 (probe AtomId, ref AtomId) used to compute the alignments.
121 If not provided, these will be generated using a
122 substructure search.
123 \param maxMatches (optional) if map is empty, this will be the max number of
124 matches found in a SubstructMatch().
125 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
126 terminal functional groups (like nitro or carboxylate)
127 will be considered symmetrically
128 \param weights (optional) weights for each pair of atoms.
129 \param reflect if true reflect the conformation of the probe molecule
130 \param maxIters maximum number of iterations used in minimizing the RMSD
131 \param numThreads (optional) number of threads to use during the calculation
132
133 <b>Returns</b>
134 Best RMSD value found
135*/
136RDKIT_MOLALIGN_EXPORT double getBestAlignmentTransform(
137 const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &bestTrans,
138 MatchVectType &bestMatch, int prbCid = -1, int refCid = -1,
139 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
140 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
141 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
142 unsigned int maxIters = 50, int numThreads = 1);
143
144//! Returns the optimal RMS for aligning two molecules, taking
145/// symmetry into account. As a side-effect, the probe molecule is
146/// left in the aligned state.
147/*!
148 This function will attempt to align all permutations of matching atom
149 orders in both molecules, for some molecules it will lead to 'combinatorial
150 explosion' especially if hydrogens are present.
151 Use 'RDKit::MolAlign::alignMol' to align molecules without changing the
152 atom order.
153
154 \param prbMol the molecule to be aligned to the reference
155 \param refMol the reference molecule
156 \param trans storage for the computed transform
157 \param prbCid (optional) probe conformation to use
158 \param refCid (optional) reference conformation to use
159 \param map (optional) a vector of vectors of pairs of atom IDs
160 (probe AtomId, ref AtomId) used to compute the alignments.
161 If not provided, these will be generated using a
162 substructure search.
163 \param maxMatches (optional) if map is empty, this will be the max number of
164 matches found in a SubstructMatch().
165 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
166 terminal functional groups (like nitro or carboxylate)
167 will be considered symmetrically
168 \param weights (optional) weights for each pair of atoms.
169 \param numThreads (optional) number of threads to use during the calculation
170
171 <b>Returns</b>
172 Best RMSD value found
173*/
174RDKIT_MOLALIGN_EXPORT double getBestRMS(
175 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
176 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
177 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
178 const RDNumeric::DoubleVector *weights = nullptr, int numThreads = 1);
179
180//! Returns the symmetric distance matrix between the conformers of a molecule.
181/// getBestRMS() is used to calculate the inter-conformer distances
182/*!
183 This function will attempt to align all permutations of matching atom
184 orders in both molecules, for some molecules it will lead to 'combinatorial
185 explosion' especially if hydrogens are present.
186
187 \param mol the molecule to be considered
188 \param numThreads (optional) number of threads to use during the calculation
189 \param map (optional) a vector of vectors of pairs of atom IDs
190 (probe AtomId, ref AtomId) used to compute the alignments.
191 If not provided, these will be generated using a
192 substructure search.
193 \param maxMatches (optional) if map is empty, this will be the max number of
194 matches found in a SubstructMatch().
195 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
196 terminal functional groups (like nitro or carboxylate)
197 will be considered symmetrically
198 \param weights (optional) weights for each pair of atoms.
199
200 <b>Returns</b>
201 a vector with the RMSD values stored in the order:
202 [(1,0), (2,0), (2,1), (3,0), (3, 2), (3,1), ...]
203*/
204RDKIT_MOLALIGN_EXPORT std::vector<double> getAllConformerBestRMS(
205 const ROMol &mol, int numThreads = 1,
206 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
207 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
208 const RDNumeric::DoubleVector *weights = nullptr);
209
210//! Returns the RMS between two molecules, taking symmetry into account.
211//! In contrast to getBestRMS, the RMS is computed "in place", i.e.
212//! probe molecules are not aligned to the reference ahead of the
213//! RMS calculation. This is useful, for example, to compute
214//! the RMSD between docking poses and the co-crystallized ligand.
215/*!
216 This function will attempt to match all permutations of matching atom
217 orders in both molecules, for some molecules it will lead to 'combinatorial
218 explosion' especially if hydrogens are present.
219
220 \param prbMol the molecule to be aligned to the reference
221 \param refMol the reference molecule
222 \param prbCid (optional) probe conformation to use
223 \param refCid (optional) reference conformation to use
224 \param map (optional) a vector of vectors of pairs of atom IDs
225 (probe AtomId, ref AtomId) used to compute the alignments.
226 If not provided, these will be generated using a
227 substructure search.
228 \param maxMatches (optional) if map is empty, this will be the max number of
229 matches found in a SubstructMatch().
230 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
231 terminal functional groups (like nitro or carboxylate) will
232 be considered symmetrically
233 \param weights (optional) weights for each pair of atoms.
234
235 <b>Returns</b>
236 Best RMSD value found
237*/
239 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
240 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
241 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
242 const RDNumeric::DoubleVector *weights = nullptr);
243
244//! Returns the RMS between two molecules, taking symmetry into account.
245//! In contrast to getBestRMS, the RMS is computed "in place", i.e.
246//! probe molecules are not aligned to the reference ahead of the
247//! RMS calculation. This is useful, for example, to compute
248//! the RMSD between docking poses and the co-crystallized ligand.
249/*!
250 This function will attempt to match all permutations of matching atom
251 orders in both molecules, for some molecules it will lead to 'combinatorial
252 explosion' especially if hydrogens are present.
253
254 \param prbMol the molecule to be aligned to the reference
255 \param refMol the reference molecule
256 \param prbCid (optional) probe conformation to use
257 \param refCid (optional) reference conformation to use
258 \param map (optional) a vector of vectors of pairs of atom IDs
259 (probe AtomId, ref AtomId) used to compute the alignments.
260 If not provided, these will be generated using a
261 substructure search.
262 \param maxMatches (optional) if map is empty, this will be the max number of
263 matches found in a SubstructMatch().
264 \param weights (optional) weights for each pair of atoms.
265
266 <b>Returns</b>
267 Best RMSD value found
268*/
269RDKIT_MOLALIGN_EXPORT double CalcRMS(ROMol &prbMol, const ROMol &refMol,
270 int prbCid, int refCid,
271 const std::vector<MatchVectType> &map,
272 int maxMatches,
273 const RDNumeric::DoubleVector *weights);
274
275//! Align the conformations of a molecule using a common set of atoms. If
276/// the molecules contains queries, then the queries must also match exactly.
277
278/*!
279 \param mol The molecule of interest.
280 \param atomIds vector of atoms to be used to generate the alignment.
281 All atoms will be used is not specified
282 \param confIds vector of conformations to align - defaults to all
283 \param weights (optional) weights for each pair of atoms.
284 \param reflect toggles reflecting (about the origin) the alignment
285 \param maxIters the maximum number of iterations to attempt
286 \param RMSlist if nonzero, this will be used to return the RMS values
287 between the reference conformation and the other aligned
288 conformations
289*/
290RDKIT_MOLALIGN_EXPORT void alignMolConformers(
291 ROMol &mol, const std::vector<unsigned int> *atomIds = nullptr,
292 const std::vector<unsigned int> *confIds = nullptr,
293 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
294 unsigned int maxIters = 50, std::vector<double> *RMSlist = nullptr);
295} // namespace MolAlign
296} // namespace RDKit
297#endif
~MolAlignException() noexcept override=default
MolAlignException(const char *msg)
construct with an error message
MolAlignException(const std::string msg)
construct with an error message
const char * what() const noexcept override
get the error message
#define RDKIT_MOLALIGN_EXPORT
Definition export.h:273
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)