RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MultithreadedMolSupplier.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Shrey Aryan
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifdef RDK_BUILD_THREADSAFE_SSS
11#ifndef MULTITHREADED_MOL_SUPPLIER
12#define MULTITHREADED_MOL_SUPPLIER
13
18#include <RDGeneral/RDLog.h>
19#include <RDGeneral/RDThreads.h>
20#include <RDGeneral/StreamOps.h>
21
22#include <atomic>
23#include <boost/tokenizer.hpp>
24
25#include "FileParsers.h"
26#include "MolSupplier.h"
27
28typedef boost::tokenizer<boost::char_separator<char>> tokenizer;
29
30namespace RDKit {
31class RDKIT_FILEPARSERS_EXPORT MultithreadedMolSupplier : public MolSupplier {
32 //! this is an abstract base class to concurrently supply molecules one at a
33 //! time
34 public:
35 MultithreadedMolSupplier() {}
36 ~MultithreadedMolSupplier() override;
37 //! pop elements from the output queue
38 ROMol *next() override;
39 //! returns true when all records have been read from the supplier
40 bool atEnd() override;
41
42 //! included for the interface, always returns false
43 bool getEOFHitOnRead() const { return false; }
44
45 //! returns the record id of the last extracted item
46 //! Note: d_LastRecordId = 0, initially therefore the value 0 is returned
47 //! if and only if the function is called before extracting the first
48 //! record
49 unsigned int getLastRecordId() const;
50 //! returns the text block for the last extracted item
51 std::string getLastItemText() const;
52
53 protected:
54 //! starts reader and writer threads
55 void startThreads();
56
57 private:
58 //! reads lines from input stream to populate the input queue
59 void reader();
60 //! parses lines from the input queue converting them to ROMol objects
61 //! populating the output queue
62 void writer();
63 //! finalizes the reader and writer threads
64 void endThreads();
65 //! disable automatic copy constructors and assignment operators
66 //! for this class and its subclasses. They will likely be
67 //! carrying around stream pointers and copying those is a recipe
68 //! for disaster.
69 MultithreadedMolSupplier(const MultithreadedMolSupplier &);
70 MultithreadedMolSupplier &operator=(const MultithreadedMolSupplier &);
71 //! not yet implemented
72 void reset() override;
73 void init() override = 0;
74 virtual bool getEnd() const = 0;
75 //! extracts next record from the input file or stream
76 virtual bool extractNextRecord(std::string &record, unsigned int &lineNum,
77 unsigned int &index) = 0;
78 //! processes the record into an ROMol object
79 virtual ROMol *processMoleculeRecord(const std::string &record,
80 unsigned int lineNum) = 0;
81
82 private:
83 std::atomic<unsigned int> d_threadCounter{1}; //!< thread counter
84 std::vector<std::thread> d_writerThreads; //!< vector writer threads
85 std::thread d_readerThread; //!< single reader thread
86
87 protected:
88 std::atomic<unsigned int> d_lastRecordId =
89 0; //!< stores last extracted record id
90 std::string d_lastItemText; //!< stores last extracted record
91 const unsigned int d_numReaderThread = 1; //!< number of reader thread
92 unsigned int d_numWriterThreads; //!< number of writer threads
93 size_t d_sizeInputQueue; //!< size of input queue
94 size_t d_sizeOutputQueue; //!< size of output queue
95
96 ConcurrentQueue<std::tuple<std::string, unsigned int, unsigned int>>
97 *d_inputQueue; //!< concurrent input queue
98 ConcurrentQueue<std::tuple<ROMol *, std::string, unsigned int>>
99 *d_outputQueue; //!< concurrent output queue
100};
101} // namespace RDKit
102#endif
103#endif
boost::tokenizer< boost::char_separator< char > > tokenizer
Definition LinkNode.h:18
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:161
Std stuff.
bool rdvalue_is(const RDValue_cast_t)