npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
SampleAccumulator.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_SAMPLEACCUMULATOR_HH_
2 #define NPSTAT_SAMPLEACCUMULATOR_HH_
3 
4 /*!
5 // \file SampleAccumulator.hh
6 //
7 // \brief Accumulates a sample and calculates various descriptive statistics
8 //
9 // Author: I. Volobouev
10 //
11 // July 2010
12 */
13 
14 #include <vector>
15 
16 #include "geners/ClassId.hh"
17 #include "npstat/nm/PreciseType.hh"
18 
19 namespace npstat {
20  /**
21  // Accumulator of values (or items) for the purpose of calculating
22  // statistical summaries in a numerically stable manner.
23  //
24  // Note that calculating quantiles using "median", "cdf", and
25  // "quantile" methods makes subsequent calls to the "cov" and "corr"
26  // methods impossible (a dynamic fault will be generated). This is because
27  // the data is sorted internally for quantile calculations, and subsequent
28  // covariance calculation with sorted samples will return a result which
29  // is most likely not intended.
30  */
31  template
32  <
33  typename Numeric,
34  typename Precise=typename PreciseType<Numeric>::type
35  >
37  {
38  public:
39  typedef Numeric value_type;
40  typedef Precise precise_type;
41 
42  inline SampleAccumulator() : sorted_(true), neverSorted_(true) {}
43 
44  /** Number of items in the accumulated sample */
45  inline unsigned long count() const {return data_.size();}
46 
47  /** Minimum value in the accumulated sample */
48  Numeric min() const;
49 
50  /** Maximum value in the accumulated sample */
51  Numeric max() const;
52 
53  /** Accumulated sample average */
54  Precise mean() const;
55 
56  /** Estimate of the population standard deviation */
57  Precise stdev() const;
58 
59  /** Uncertainty of the population mean */
60  Precise meanUncertainty() const;
61 
62  /** Accumulated sample median */
63  Numeric median() const;
64 
65  /** Empirical cumulative distribuition function */
66  double cdf(Numeric value) const;
67 
68  /** Exact number of points below the given value */
69  unsigned long nBelow(Numeric value) const;
70 
71  /** Empirical quantile function */
72  Numeric quantile(double x) const;
73 
74  //@{
75  /** Standard plotting accessor */
76  inline Numeric location() const
77  {return median();}
78  inline Numeric rangeDown() const
79  {return median() - quantile(0.158655253931457051);}
80  inline Numeric rangeUp() const
81  {return quantile(0.841344746068542949) - median();}
82  //@}
83 
84  /**
85  // Robust scale corresponding to the standard deviation for
86  // points distributed according to the Gaussian distribution
87  */
88  inline Numeric sigmaRange() const
89  {return (quantile(0.841344746068542949) -
90  quantile(0.158655253931457051))/static_cast<Numeric>(2);}
91 
92  /**
93  // Accumulated sample average. Will not throw an exception if
94  // no data is accumulated. Instead, the provided function
95  // argument will be returned.
96  */
97  Precise noThrowMean(const Precise& valueIfNoData) const;
98 
99  /**
100  // Standard deviation estimate. Will not throw an exception if
101  // no data is accumulated. Instead, the provided function
102  // argument will be returned.
103  */
104  Precise noThrowStdev(const Precise& valueIfNoData) const;
105 
106  /**
107  // Uncertainty of the mean. Will not throw an exception if
108  // no data is accumulated. Instead, the provided function
109  // argument will be returned.
110  */
111  Precise noThrowMeanUncertainty(const Precise& valueIfNoData) const;
112 
113  /**
114  // Note that the result returned by this method
115  // may be invalidated by a call to any non-const method
116  */
117  inline const Numeric* data() const
118  {return data_.empty() ? 0 : &data_[0];}
119 
120  //@{
121  /**
122  // Covariance and correlation with another accumulator.
123  // That other accumulator must have the same number of
124  // points stored. Note that "cov" and "corr" methods
125  // typically make sense only for the original, unmodified
126  // sequences of data values. If the data points have been
127  // sorted (which happens internally whenever one of the
128  // methods "median", "cdf", "quantile", or "nBelow" is called),
129  // covariance and correlation can produce rather unexpected
130  // results. To prevent any confusion, this class will throw
131  // an exception of type std::runtime_error in case "cov"
132  // or "corr" is called after the data in any of the two
133  // accumulators has been sorted. Basically, if you plan
134  // to calculate covariances between accumulators, do it
135  // before calculating any quantiles or cdf values.
136  */
137  template <typename Numeric2, typename Precise2>
138  Precise cov(const SampleAccumulator<Numeric2,Precise2>& other) const;
139 
140  template <typename Num2, typename Prec2>
141  inline Precise corr(const SampleAccumulator<Num2,Prec2>& other) const
142  {return cov(other)/stdev()/other.stdev();}
143  //@}
144 
145  /** Comparison for equality */
146  bool operator==(const SampleAccumulator& r) const;
147 
148  /** Logical negation of operator== */
149  bool operator!=(const SampleAccumulator& r) const;
150 
151  //@{
152  /** Accumulate the sample */
153  inline void accumulate(const Numeric& value)
154  {data_.push_back(value); sorted_ = false;}
155  inline SampleAccumulator& operator+=(const Numeric& r)
156  {accumulate(r); return *this;}
157  //@}
158 
159  /** Accumulate more than one value at a time */
160  void accumulate(const Numeric* values, unsigned long n);
161 
162  //@{
163  /** Accumulate the sample from another accumulator */
164  void accumulate(const SampleAccumulator& acc);
165  inline SampleAccumulator& operator+=(const SampleAccumulator& r)
166  {accumulate(r); return *this;}
167  //@}
168 
169  /** Multiply every entry by a constant */
170  template<typename Num2>
171  SampleAccumulator& operator*=(const Num2& r);
172 
173  /** Divide every entry by a constant */
174  template<typename Num2>
175  SampleAccumulator& operator/=(const Num2& r);
176 
177  //@{
178  /*
179  // The binary operators are rather inefficient
180  // and should be avoided in tight loops
181  */
182  inline SampleAccumulator operator+(const SampleAccumulator& r) const
183  {
184  SampleAccumulator acc(*this);
185  acc += r;
186  return acc;
187  }
188 
189  template<typename Num2>
190  inline SampleAccumulator operator*(const Num2& r) const
191  {
192  SampleAccumulator acc(*this);
193  acc *= r;
194  return acc;
195  }
196 
197  template<typename Num2>
198  inline SampleAccumulator operator/(const Num2& r) const
199  {
200  SampleAccumulator acc(*this);
201  acc /= r;
202  return acc;
203  }
204  //@}
205 
206  /** Clear all accumulated data */
207  inline void reset() {data_.clear(); sorted_=true; neverSorted_=true;}
208 
209  /** Reserve memory for data to be accumulated in the future */
210  inline void reserve(const unsigned long n) {data_.reserve(n);}
211 
212  //@{
213  /** Method related to "geners" I/O */
214  inline gs::ClassId classId() const {return gs::ClassId(*this);}
215  bool write(std::ostream& of) const;
216  //@}
217 
218  static const char* classname();
219  static inline unsigned version() {return 1;}
220  static void restore(const gs::ClassId& id, std::istream& in,
221  SampleAccumulator* acc);
222  private:
223  void sort() const;
224 
225  std::vector<Numeric> data_;
226  bool sorted_;
227  bool neverSorted_;
228 
229 #ifdef SWIG
230  public:
231  inline SampleAccumulator mul2(const double r) const
232  {return operator*(r);}
233 
234  inline SampleAccumulator div2(const double r) const
235  {return operator/(r);}
236 
237  inline SampleAccumulator& imul2(const double r)
238  {*this *= r; return *this;}
239 
240  inline SampleAccumulator& idiv2(const double r)
241  {*this /= r; return *this;}
242 
243  inline double cov2(const SampleAccumulator& other) const
244  {return cov(other);}
245 
246  inline double corr2(const SampleAccumulator& other) const
247  {return corr(other);}
248 
249  inline double mean2() const
250  {return mean();}
251 
252  inline double stdev2() const
253  {return stdev();}
254 
255  inline double meanUncertainty2() const
256  {return meanUncertainty();}
257 
258  inline double noThrowMean2(double d) const
259  {return noThrowMean(d);}
260 
261  inline double noThrowStdev2(double d) const
262  {return noThrowStdev(d);}
263 
264  inline double noThrowMeanUncertainty2(double d) const
265  {return noThrowMeanUncertainty(d);}
266 #endif // SWIG
267  };
268 
269  typedef SampleAccumulator<float,long double> FloatSampleAccumulator;
270  typedef SampleAccumulator<double,long double> DoubleSampleAccumulator;
271 }
272 
273 #include "npstat/stat/SampleAccumulator.icc"
274 
275 #endif // NPSTAT_SAMPLEACCUMULATOR_HH_
Compile-time deduction of an appropriate precise numeric type.
Definition: SampleAccumulator.hh:37
void accumulate(const Numeric *values, unsigned long n)
Numeric median() const
void accumulate(const Numeric &value)
Definition: SampleAccumulator.hh:153
unsigned long nBelow(Numeric value) const
void reserve(const unsigned long n)
Definition: SampleAccumulator.hh:210
Precise noThrowMean(const Precise &valueIfNoData) const
Numeric sigmaRange() const
Definition: SampleAccumulator.hh:88
bool operator==(const SampleAccumulator &r) const
unsigned long count() const
Definition: SampleAccumulator.hh:45
double cdf(Numeric value) const
gs::ClassId classId() const
Definition: SampleAccumulator.hh:214
SampleAccumulator & operator/=(const Num2 &r)
void accumulate(const SampleAccumulator &acc)
const Numeric * data() const
Definition: SampleAccumulator.hh:117
bool operator!=(const SampleAccumulator &r) const
Precise noThrowStdev(const Precise &valueIfNoData) const
Numeric quantile(double x) const
Precise noThrowMeanUncertainty(const Precise &valueIfNoData) const
Precise meanUncertainty() const
void reset()
Definition: SampleAccumulator.hh:207
Precise cov(const SampleAccumulator< Numeric2, Precise2 > &other) const
Numeric location() const
Definition: SampleAccumulator.hh:76
SampleAccumulator & operator*=(const Num2 &r)
Definition: AbsArrayProjector.hh:14