RDKit
Open-source cheminformatics and machine learning.
MolHash.h
Go to the documentation of this file.
1 // $Id$
2 //
3 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #pragma once
12 #include <map>
13 #include <vector>
14 #include <string>
15 #include <stdexcept>
16 #include <boost/cstdint.hpp>
17 #include "../RDKitBase.h"
18 
19 namespace RDKit
20 {
21  namespace MolHash
22  {
23  typedef boost::uint32_t HashCodeType;
24 
25  HashCodeType generateMoleculeHashCode(const ROMol &mol,
26  const std::vector<unsigned> *atomsToUse=0,
27  const std::vector<unsigned> *bondsToUse=0, // ?? listed bonds between/to/from excluded atom(s) ??
28  const std::vector<boost::uint32_t> *atomCodes =0,
29  const std::vector<boost::uint32_t> *bondCodes =0);
30 
31  enum CodeFlags // bitwise flags to combine and compute atom/bond codes
32  {
33  CF_NO_LABELS= 0x0000,
34  CF_ELEMENT = 0x0001,
35  CF_CHARGE = 0x0002,
36  CF_VALENCE = 0x0004,
37  CF_ISOTOPE = 0x0008,
39  CF_ATOM_AROMATIC = 0x0020,
40  CF_ATOM_ALL = 0x00FF,
41  CF_BOND_ORDER = 0x0100, // ignore AROMATIZATION if corresponding flag is not specified
43  CF_BOND_TYPE_EXACT = CF_BOND_ORDER | CF_BOND_AROMATIZATION, // exact type value with aromatic
44  CF_BOND_CHIRALITY = 0x0400, // include bond chirality information into bond code
45  CF_BOND_IN_RING = 0x0800,
46  CF_BOND_ALL = 0xFF00,
47  CF_ALL = 0xFFFF,
48  };
49 
50  void fillAtomBondCodes(const ROMol &mol, boost::uint64_t flags // CodeFlags constants combination
51  , std::vector<boost::uint32_t> *atomCodes // NULL is allowed
52  , std::vector<boost::uint32_t> *bondCodes); // NULL is allowed
53 
54 #pragma pack(push,1)
55  struct HashSet
56  {
57  boost::uint16_t Version;
58  boost::uint16_t Reserved;
59  boost::uint16_t NumAtoms;
60  boost::uint16_t NumBonds;
61  boost::uint32_t FormulaCRC32;
62  HashCodeType NonChiralAtomsHash;
63  HashCodeType NonChiralBondsHash;
64  HashCodeType ChiralAtomsHash;
65  HashCodeType ChiralBondsHash;
66  HashCodeType ChiralityHash;
67  public:
68  HashSet() { memset(this, 0, sizeof(*this));}
69  };
70 #pragma pack(pop)
71 
72  void generateMoleculeHashSet(const ROMol &mol, HashSet& res,
73  const std::vector<unsigned> *atomsToUse=0,
74  const std::vector<unsigned> *bondsToUse=0);
75 
76  std::string generateMoleculeHashSet(const ROMol &mol,
77  const std::vector<unsigned> *atomsToUse=0,
78  const std::vector<unsigned> *bondsToUse=0);
79 
80  std::string encode(const void* bin, size_t size); // binary data to Base64 encoded string
81 
82 }}
HashCodeType NonChiralBondsHash
Definition: MolHash.h:63
HashCodeType ChiralBondsHash
Definition: MolHash.h:65
boost::uint16_t NumBonds
Definition: MolHash.h:60
boost::uint32_t FormulaCRC32
Definition: MolHash.h:61
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
HashCodeType ChiralityHash
Definition: MolHash.h:66
HashCodeType NonChiralAtomsHash
Definition: MolHash.h:62
boost::uint16_t Reserved
Definition: MolHash.h:58
void generateMoleculeHashSet(const ROMol &mol, HashSet &res, const std::vector< unsigned > *atomsToUse=0, const std::vector< unsigned > *bondsToUse=0)
HashCodeType ChiralAtomsHash
Definition: MolHash.h:64
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
boost::uint16_t NumAtoms
Definition: MolHash.h:59
boost::uint32_t HashCodeType
Definition: MolHash.h:23
HashCodeType generateMoleculeHashCode(const ROMol &mol, const std::vector< unsigned > *atomsToUse=0, const std::vector< unsigned > *bondsToUse=0, const std::vector< boost::uint32_t > *atomCodes=0, const std::vector< boost::uint32_t > *bondCodes=0)
void fillAtomBondCodes(const ROMol &mol, boost::uint64_t flags, std::vector< boost::uint32_t > *atomCodes, std::vector< boost::uint32_t > *bondCodes)
boost::uint16_t Version
Definition: MolHash.h:57
std::string encode(const void *bin, size_t size)