casacore
FitToHalfStatistics.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 //# $Id: Array.h 21545 2015-01-22 19:36:35Z gervandiepen $
26 
27 #ifndef SCIMATH_FITTOHALFSTATISTICS_H
28 #define SCIMATH_FITTOHALFSTATISTICS_H
29 
30 #include <casacore/casa/aips.h>
31 
32 #include <casacore/scimath/Mathematics/ConstrainedRangeStatistics.h>
33 #include <casacore/scimath/Mathematics/FitToHalfStatisticsData.h>
34 
35 namespace casacore {
36 
37 // Class to calculate statistics using the so-called fit to half algorithm. In this
38 // algorithm, a center value is specified, and only points greater or equal or less or equal
39 // this value are included. Furthermore, each of the included points is reflected about
40 // the center value, and these virtual points are added to the included points and
41 // the union of sets of included real points and virtual points are used for computing statistics.
42 // The specified center point is therefore the mean and median of the resulting
43 // distribution, and the total number of points is exactly twice the number of real
44 // data points that are included.
45 
46 template <class AccumType, class DataIterator, class MaskIterator=const Bool *, class WeightsIterator=DataIterator>
48  : public ConstrainedRangeStatistics<CASA_STATP> {
49 public:
50 
51  const static AccumType TWO;
52 
53  // <src>value</src> is only used if <src>center</src>=CVALUE
57  AccumType value=0
58  );
59 
60  virtual ~FitToHalfStatistics();
61 
62  // copy semantics
65  );
66 
67  // get the algorithm that this object uses for computing stats
70  };
71 
72  // The median is just the center value, so none of the parameters to this method are used.
73  AccumType getMedian(
74  CountedPtr<uInt64> knownNpts=NULL, CountedPtr<AccumType> knownMin=NULL,
75  CountedPtr<AccumType> knownMax=NULL, uInt binningThreshholdSizeBytes=4096*4096,
76  Bool persistSortedArray=False, uInt64 nBins=10000
77  );
78 
79  // <group>
80  // In the following group of methods, if the size of the composite dataset
81  // is smaller than
82  // <src>binningThreshholdSizeBytes</src>, the composite dataset
83  // will be (perhaps partially) sorted and persisted in memory during the
84  // call. In that case, and if <src>persistSortedArray</src> is True, this
85  // sorted array will remain in memory after the call and will be used on
86  // subsequent calls of this method when <src>binningThreshholdSizeBytes</src>
87  // is greater than the size of the composite dataset. If
88  // <src>persistSortedArray</src> is False, the sorted array will not be
89  // stored after this call completes and so any subsequent calls for which the
90  // dataset size is less than <src>binningThreshholdSizeBytes</src>, the
91  // dataset will be sorted from scratch. Values which are not included due to
92  // non-unity strides, are not included in any specified ranges, are masked,
93  // or have associated weights of zero are not considered as dataset members
94  // for quantile computations.
95  // If one has a priori information regarding
96  // the number of points (npts) and/or the minimum and maximum values of the data
97  // set, these can be supplied to improve performance. Note however, that if these
98  // values are not correct, the resulting median
99  // and/or quantile values will also not be correct (although see the following notes regarding
100  // max/min). Note that if this object has already had getStatistics()
101  // called, and the min and max were calculated, there is no need to pass these values in
102  // as they have been stored internally and used (although passing them in shouldn't hurt
103  // anything). If provided, npts, the number of points falling in the specified ranges which are
104  // not masked and have weights > 0, should be exactly correct. <src>min</src> can be less than
105  // the true minimum, and <src>max</src> can be greater than the True maximum, but for best
106  // performance, these should be as close to the actual min and max as possible.
107 
108  AccumType getMedianAndQuantiles(
109  std::map<Double, AccumType>& quantiles, const std::set<Double>& fractions,
110  CountedPtr<uInt64> knownNpts=NULL, CountedPtr<AccumType> knownMin=NULL,
111  CountedPtr<AccumType> knownMax=NULL,
112  uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
113  uInt64 nBins=10000
114  );
115 
116  // get the median of the absolute deviation about the median of the data.
117  AccumType getMedianAbsDevMed(
118  CountedPtr<uInt64> knownNpts=NULL,
119  CountedPtr<AccumType> knownMin=NULL, CountedPtr<AccumType> knownMax=NULL,
120  uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
121  uInt64 nBins=10000
122  );
123 
124  // Get the specified quantiles. <src>fractions</src> must be between 0 and 1,
125  // noninclusive.
126  std::map<Double, AccumType> getQuantiles(
127  const std::set<Double>& fractions, CountedPtr<uInt64> knownNpts=NULL,
128  CountedPtr<AccumType> knownMin=NULL, CountedPtr<AccumType> knownMax=NULL,
129  uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
130  uInt64 nBins=10000
131  );
132  // </group>
133 
134  // scan the dataset(s) that have been added, and find the min and max.
135  // This method may be called even if setStatsToCaclulate has been called and
136  // MAX and MIN has been excluded.
137  virtual void getMinMax(AccumType& mymin, AccumType& mymax);
138 
139  // scan the dataset(s) that have been added, and find the number of good points.
140  // This method may be called even if setStatsToCaclulate has been called and
141  // NPTS has been excluded. If setCalculateAsAdded(True) has previously been
142  // called after this object has been (re)initialized, an exception will be thrown.
143  uInt64 getNPts();
144 
145  // reset object to initial state. Clears all private fields including data,
146  // accumulators, global range. It does not affect the fence factor (_f), which was
147  // set at object construction.
148  virtual void reset();
149 
150  // This class does not allow statistics to be calculated as datasets are added, so
151  // an exception will be thrown if <src>c</src> is True.
152  void setCalculateAsAdded(Bool c);
153 
154 protected:
155 
156  virtual void _clearData();
157 
159 
161 
162  inline const StatsData<AccumType>& _getStatsData() const { return _statsData; }
163 
164  // <group>
165  // no weights, no mask, no ranges
166  void _unweightedStats(
167  uInt64& ngood, AccumType& mymin, AccumType& mymax,
168  Int64& minpos, Int64& maxpos,
169  const DataIterator& dataBegin, Int64 nr, uInt dataStride
170  );
171 
172  // no weights, no mask
173  void _unweightedStats(
174  uInt64& ngood, AccumType& mymin, AccumType& mymax,
175  Int64& minpos, Int64& maxpos,
176  const DataIterator& dataBegin, Int64 nr, uInt dataStride,
177  const DataRanges& ranges, Bool isInclude
178  );
179 
180  void _unweightedStats(
181  uInt64& ngood, AccumType& mymin, AccumType& mymax,
182  Int64& minpos, Int64& maxpos,
183  const DataIterator& dataBegin, Int64 nr, uInt dataStride,
184  const MaskIterator& maskBegin, uInt maskStride
185  );
186 
187  void _unweightedStats(
188  uInt64& ngood, AccumType& mymin, AccumType& mymax,
189  Int64& minpos, Int64& maxpos,
190  const DataIterator& dataBegin, Int64 nr, uInt dataStride,
191  const MaskIterator& maskBegin, uInt maskStride,
192  const DataRanges& ranges, Bool isInclude
193  );
194  // </group>
195 
196  virtual void _updateMaxMin(
197  AccumType mymin, AccumType mymax, Int64 minpos,
198  Int64 maxpos, uInt dataStride, const Int64& currentDataset
199  );
200 
201  // <group>
202  // has weights, but no mask, no ranges
203  void _weightedStats(
204  AccumType& mymin, AccumType& mymax,
205  Int64& minpos, Int64& maxpos,
206  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
207  Int64 nr, uInt dataStride
208  );
209 
210  void _weightedStats(
211  AccumType& mymin, AccumType& mymax,
212  Int64& minpos, Int64& maxpos,
213  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
214  Int64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
215  );
216 
217  void _weightedStats(
218  AccumType& mymin, AccumType& mymax,
219  Int64& minpos, Int64& maxpos,
220  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
221  Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
222  );
223 
224  void _weightedStats(
225  AccumType& mymin, AccumType& mymax,
226  Int64& minpos, Int64& maxpos,
227  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
228  Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
229  const DataRanges& ranges, Bool isInclude
230  );
231  // </group>
232 
233 private:
236  AccumType _centerValue;
239  // these are the max and min for the real portion of the dataset
241 
242  void _getRealMinMax(
243  CountedPtr<AccumType>& realMin, CountedPtr<AccumType>& realMax,
245  );
246 
247  void _setRange();
248 };
249 
250 }
251 
252 #ifndef CASACORE_NO_AUTO_TEMPLATES
253 #include <casacore/scimath/Mathematics/FitToHalfStatistics.tcc>
254 #endif //# CASACORE_NO_AUTO_TEMPLATES
255 
256 #endif
virtual void reset()
reset object to initial state.
AccumType getMedian(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
The median is just the center value, so none of the parameters to this method are used...
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition: aipsxtype.h:38
StatsData< AccumType > & _getStatsData()
retreive stats structure.
void _weightedStats(AccumType &mymin, AccumType &mymax, Int64 &minpos, Int64 &maxpos, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, Int64 nr, uInt dataStride)
has weights, but no mask, no ranges
unsigned long long uInt64
Definition: aipsxtype.h:39
StatsData< AccumType > _statsData
void _unweightedStats(uInt64 &ngood, AccumType &mymin, AccumType &mymax, Int64 &minpos, Int64 &maxpos, const DataIterator &dataBegin, Int64 nr, uInt dataStride)
no weights, no mask, no ranges
USE_DATA
which section of data to use, greater than or less than the center value
FitToHalfStatistics(FitToHalfStatisticsData::CENTER center=FitToHalfStatisticsData::CMEAN, FitToHalfStatisticsData::USE_DATA useData=FitToHalfStatisticsData::LE_CENTER, AccumType value=0)
value is only used if center=CVALUE
FitToHalfStatisticsData::CENTER _centerType
ALGORITHM
implemented algorithms
StatsData< AccumType > _getStatistics()
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
CENTER
choice of center point based on the corresponding statistics from the entire distribution of data...
AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
CountedPtr< AccumType > _realMin
Referenced counted pointer for constant data.
Definition: CountedPtr.h:86
void _getRealMinMax(CountedPtr< AccumType > &realMin, CountedPtr< AccumType > &realMax, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax)
virtual void _updateMaxMin(AccumType mymin, AccumType mymax, Int64 minpos, Int64 maxpos, uInt dataStride, const Int64 &currentDataset)
void setCalculateAsAdded(Bool c)
This class does not allow statistics to be calculated as datasets are added, so an exception will be ...
Class to calculate statistics using the so-called fit to half algorithm.
#define DataRanges
Commonly used types in statistics framework.
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:39
Abstract base class for statistics algorithms which are characterized by a range of good values...
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
const Bool False
Definition: aipstype.h:41
std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
Get the specified quantiles.
CountedPtr< AccumType > _realMax
these are the max and min for the real portion of the dataset
const Double c
Fundamental physical constants (SI units):
const StatsData< AccumType > & _getStatsData() const
FitToHalfStatistics< CASA_STATP > & operator=(const FitToHalfStatistics< CASA_STATP > &other)
copy semantics
uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
get the median of the absolute deviation about the median of the data.
this file contains all the compiler specific defines
Definition: mainpage.dox:28
LatticeExprNode value(const LatticeExprNode &expr)
This function returns the value of the expression without a mask.
unsigned int uInt
Definition: aipstype.h:48