11 #ifndef __RD_SPARSE_INT_VECT_20070921__ 12 #define __RD_SPARSE_INT_VECT_20070921__ 20 #include <boost/cstdint.hpp> 26 template <
typename IndexType>
38 d_length=other.d_length;
39 d_data.insert(other.d_data.begin(),other.d_data.end());
44 initFromText(pkl.c_str(),pkl.size());
48 initFromText(pkl,len);
55 #pragma clang diagnostic push 56 #pragma clang diagnostic ignored "-Wtautological-compare" 60 if(idx<0||idx>=d_length){
64 typename StorageType::const_iterator iter=d_data.find(idx);
65 if(iter!=d_data.end()){
73 if(idx<0||idx>=d_length){
83 #pragma clang diagnostic pop 95 typename StorageType::const_iterator iter;
96 for(iter=d_data.begin();iter!=d_data.end();++iter){
97 if(!doAbs) res+=iter->second;
98 else res+=abs(iter->second);
117 if(other.d_length!=d_length){
121 typename StorageType::iterator iter=d_data.begin();
122 typename StorageType::const_iterator oIter=other.d_data.begin();
123 while(iter!=d_data.end()){
125 while(oIter!=other.d_data.end() && oIter->first < iter->first){
128 if(oIter!=other.d_data.end() && oIter->first==iter->first){
130 if(oIter->second<iter->second){
131 iter->second=oIter->second;
138 typename StorageType::iterator tmpIter=iter;
157 if(other.d_length!=d_length){
161 typename StorageType::iterator iter=d_data.begin();
162 typename StorageType::const_iterator oIter=other.d_data.begin();
163 while(iter!=d_data.end()){
165 while(oIter!=other.d_data.end() &&
166 oIter->first < iter->first){
167 d_data[oIter->first]=oIter->second;
170 if(oIter!=other.d_data.end() && oIter->first==iter->first){
172 if(oIter->second>iter->second){
173 iter->second=oIter->second;
180 while(oIter!=other.d_data.end()){
181 d_data[oIter->first]=oIter->second;
194 if(other.d_length!=d_length){
197 typename StorageType::iterator iter=d_data.begin();
198 typename StorageType::const_iterator oIter=other.d_data.begin();
199 while(oIter!=other.d_data.end()){
200 while(iter!=d_data.end() &&
201 iter->first < oIter->first){
204 if(iter!=d_data.end() && oIter->first==iter->first){
206 iter->second+=oIter->second;
208 typename StorageType::iterator tIter=iter;
216 d_data[oIter->first]=oIter->second;
230 if(other.d_length!=d_length){
233 typename StorageType::iterator iter=d_data.begin();
234 typename StorageType::const_iterator oIter=other.d_data.begin();
235 while(oIter!=other.d_data.end()){
236 while(iter!=d_data.end() &&
237 iter->first < oIter->first){
240 if(iter!=d_data.end() && oIter->first==iter->first){
242 iter->second-=oIter->second;
244 typename StorageType::iterator tIter=iter;
252 d_data[oIter->first] = -oIter->second;
265 typename StorageType::iterator iter=d_data.begin();
266 while(iter!=d_data.end()){
279 typename StorageType::iterator iter=d_data.begin();
280 while(iter!=d_data.end()){
293 typename StorageType::iterator iter=d_data.begin();
294 while(iter!=d_data.end()){
307 typename StorageType::iterator iter=d_data.begin();
308 while(iter!=d_data.end()){
321 if(d_length!=v2.d_length){
324 return d_data==v2.d_data;
332 std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
333 boost::uint32_t tInt;
336 tInt=
sizeof(IndexType);
339 IndexType nEntries=d_data.size();
342 typename StorageType::const_iterator iter=d_data.begin();
343 while(iter!=d_data.end()){
345 boost::int32_t tInt=iter->second;
353 initFromText(txt.c_str(),txt.length());
360 void initFromText(
const char *pkl,
const unsigned int len) {
362 std::stringstream ss(std::ios_base::binary|std::ios_base::out|std::ios_base::in);
365 boost::uint32_t vers;
368 boost::uint32_t tInt;
370 if(tInt>
sizeof(IndexType)){
371 throw ValueErrorException(
"IndexType cannot accomodate index size in SparseIntVect pickle");
375 readVals<unsigned char>(ss);
break;
376 case sizeof(boost::int32_t):
377 readVals<boost::uint32_t>(ss);
break;
378 case sizeof(boost::int64_t):
379 readVals<boost::uint64_t>(ss);
break;
387 template <
typename T>
388 void readVals(std::stringstream &ss){
389 PRECONDITION(
sizeof(T)<=
sizeof(IndexType),
"invalid size");
395 for(T i=0;i<nEntries;++i){
404 template <
typename IndexType,
typename SequenceType>
406 const SequenceType &seq){
407 typename SequenceType::const_iterator seqIt;
408 for(seqIt=seq.begin();seqIt!=seq.end();++seqIt){
410 IndexType idx=*seqIt;
416 template <
typename IndexType>
419 double &v1Sum,
double &v2Sum,
424 v1Sum=v2Sum=andSum=0.0;
438 if(iter2->first == iter1->first){
439 if(abs(iter2->second)<abs(iter1->second)){
440 andSum += abs(iter2->second);
442 andSum += abs(iter1->second);
456 v1Sum+=abs(iter1->second);
463 v2Sum+=abs(iter2->second);
470 template <
typename IndexType>
473 bool returnDistance=
false,
480 if(!returnDistance && bounds>0.0){
483 double denom=v1Sum+v2Sum;
484 if(fabs(denom)<1e-6){
491 double minV=v1Sum<v2Sum?v1Sum:v2Sum;
492 if(2.*minV/denom<bounds){
501 calcVectParams(v1,v2,v1Sum,v2Sum,numer);
503 double denom=v1Sum+v2Sum;
505 if(fabs(denom)<1e-6){
510 if(returnDistance) sim = 1.-sim;
516 template <
typename IndexType>
520 bool returnDistance=
false,
529 calcVectParams(v1,v2,v1Sum,v2Sum,andSum);
531 double denom=a*v1Sum+b*v2Sum+(1-a-b)*andSum;
534 if(fabs(denom)<1e-6){
539 if(returnDistance) sim = 1.-sim;
544 template <
typename IndexType>
547 bool returnDistance=
false,
std::string toString() const
returns a binary string representation (pickle)
double DiceSimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, bool returnDistance=false, double bounds=0.0)
SparseIntVect(const char *pkl, const unsigned int len)
constructor from a pickle
void updateFromSequence(SparseIntVect< IndexType > &vect, const SequenceType &seq)
const SparseIntVect< IndexType > operator+(const SparseIntVect< IndexType > &other) const
const int ci_SPARSEINTVECT_VERSION
version number to use in pickles
std::map< IndexType, int > StorageType
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
SparseIntVect< IndexType > & operator&=(const SparseIntVect< IndexType > &other)
~SparseIntVect()
destructor (doesn't need to do anything)
int getVal(IndexType idx) const
return the value at an index
const SparseIntVect< IndexType > operator&(const SparseIntVect< IndexType > &other) const
SparseIntVect< IndexType > & operator*(int v)
SparseIntVect(const SparseIntVect< IndexType > &other)
Copy constructor.
unsigned int size() const
returns the length
double TanimotoSimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, bool returnDistance=false, double bounds=0.0)
const SparseIntVect< IndexType > operator-(const SparseIntVect< IndexType > &other) const
bool operator!=(const SparseIntVect< IndexType > &v2) const
SparseIntVect< IndexType > & operator/=(int v)
SparseIntVect< IndexType > & operator/(int v)
Includes a bunch of functionality for handling Atom and Bond queries.
bool operator==(const SparseIntVect< IndexType > &v2) const
SparseIntVect< IndexType > & operator-=(const SparseIntVect< IndexType > &other)
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
const StorageType & getNonzeroElements() const
returns our nonzero elements as a map(IndexType->int)
SparseIntVect(const std::string pkl)
constructor from a pickle
SparseIntVect(IndexType length)
initialize with a particular length
SparseIntVect< IndexType > & operator*=(int v)
const SparseIntVect< IndexType > operator|(const SparseIntVect< IndexType > &other) const
double TverskySimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, double a, double b, bool returnDistance=false, double bounds=0.0)
int getTotalVal(bool doAbs=false) const
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
#define PRECONDITION(expr, mess)
a class for efficiently storing sparse vectors of ints
void setVal(IndexType idx, int val)
set the value at an index
SparseIntVect< IndexType > & operator+=(const SparseIntVect< IndexType > &other)
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
SparseIntVect< IndexType > & operator|=(const SparseIntVect< IndexType > &other)
IndexType getLength() const
returns the length
int operator[](IndexType idx) const
support indexing using []
void fromString(const std::string &txt)