npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
WeightedSampleAccumulator.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_WEIGHTEDSAMPLEACCUMULATOR_HH_
2 #define NPSTAT_WEIGHTEDSAMPLEACCUMULATOR_HH_
3 
4 /*!
5 // \file WeightedSampleAccumulator.hh
6 //
7 // \brief Accumulates a sample of weighted points and calculates various
8 // descriptive statistics
9 //
10 // Author: I. Volobouev
11 //
12 // October 2011
13 */
14 
15 #include <vector>
16 #include <cassert>
17 #include <utility>
18 
19 #include "geners/ClassId.hh"
20 #include "npstat/nm/PreciseType.hh"
21 
22 namespace npstat {
23  /**
24  // Accumulator of weighted points for the purpose of calculating
25  // statistical summaries in a numerically stable manner
26  */
27  template
28  <
29  typename Numeric,
30  typename Precise=typename PreciseType<Numeric>::type
31  >
33  {
34  public:
35  typedef Numeric value_type;
36  typedef Precise precise_type;
37 
38  inline WeightedSampleAccumulator() {reset();}
39 
40  /** Maximum weight among those accumulate so far */
41  inline double maxWeight() const {return maxWeight_;}
42 
43  /**
44  // This method returns the total number of times
45  // the "accumulate" method was called on individual points
46  // since the last reset (or since the object was created)
47  */
48  inline unsigned long ncalls() const {return callCount_;}
49 
50  /**
51  // This method returns the number of times the "accumulate"
52  // method was called with a positive weight
53  */
54  inline unsigned long nfills() const {return data_.size();}
55 
56  /**
57  // This method returns the effective number of counts
58  // which is (squared sum of weights)/(sum of squared weights)
59  */
60  double count() const;
61 
62  /** Minimum value in the accumulated sample */
63  Numeric min() const;
64 
65  /** Maximum value in the accumulated sample */
66  Numeric max() const;
67 
68  /** Accumulated sample average */
69  Precise mean() const;
70 
71  /** Estimate of the population standard deviation */
72  Precise stdev() const;
73 
74  /** Uncertainty of the population mean */
75  Precise meanUncertainty() const;
76 
77  /** Accumulated sample median */
78  Numeric median() const;
79 
80  /** Empirical cumulative distribuition function */
81  double cdf(Numeric value) const;
82 
83  /** Sum of weights below the given coordinate */
84  double weightBelow(Numeric value) const;
85 
86  /** Empirical quantile function */
87  Numeric quantile(double x) const;
88 
89  //@{
90  /** Standard plotting accessor */
91  inline Numeric location() const
92  {return median();}
93  inline Numeric rangeDown() const
94  {return median() - quantile(0.158655253931457051);}
95  inline Numeric rangeUp() const
96  {return quantile(0.841344746068542949) - median();}
97  //@}
98 
99  /**
100  // Robust scale corresponding to the standard deviation for
101  // points distributed according to the Gaussian distribution
102  */
103  inline Numeric sigmaRange() const
104  {return (quantile(0.841344746068542949) -
105  quantile(0.158655253931457051))/static_cast<Numeric>(2);}
106 
107  /**
108  // Accumulated sample average. Will not throw an exception if
109  // no data is accumulated. Instead, the provided function
110  // argument will be returned.
111  */
112  Precise noThrowMean(const Precise& valueIfNoData) const;
113 
114  /**
115  // Standard deviation estimate. Will not throw an exception if
116  // no data is accumulated. Instead, the provided function
117  // argument will be returned.
118  */
119  Precise noThrowStdev(const Precise& valueIfNoData) const;
120 
121  /**
122  // Uncertainty of the mean. Will not throw an exception if
123  // no data is accumulated. Instead, the provided function
124  // argument will be returned.
125  */
126  Precise noThrowMeanUncertainty(const Precise& valueIfNoData) const;
127 
128  /** Average weight (fills with 0 weight are ignored) */
129  double averageWeight() const;
130 
131  /** Sum of weights accumulated so far */
132  double sumOfWeights() const;
133 
134  /** Sum of squared weights accumulated so far */
135  double sumOfSquaredWeights() const;
136 
137  /**
138  // Note that the result returned by this method
139  // can be invalidated by a call to any non-const method.
140  // The data will include only the points with non-zero
141  // weights.
142  */
143  inline const std::pair<Numeric,double>* data() const
144  {return data_.empty() ? 0 : &data_[0];}
145 
146  /** Comparison for equality */
147  bool operator==(const WeightedSampleAccumulator& r) const;
148 
149  /** Logical negation of operator== */
150  bool operator!=(const WeightedSampleAccumulator& r) const;
151 
152  /**
153  // Accumulate the sample. Weights must be non-negative.
154  // Run-time error will be generated for negative weights.
155  */
156  void accumulate(const Numeric& value, double weight);
157 
158  /** Accumulate the sample from another accumulator */
160 
161  //@{
162  /**
163  // In this method, it is assumed that the first element
164  // of the pair is the value of type Numeric and the second
165  // is the weight which can be automatically converted to double
166  */
167  template<typename Pair>
168  void accumulate(const Pair* values, unsigned long n);
169 
170  template<typename Pair>
171  inline void accumulate(const Pair& pair)
172  {accumulate(pair.first, pair.second);}
173 
174  template<typename Pair>
175  inline WeightedSampleAccumulator& operator+=(const Pair& pair)
176  {accumulate(pair.first, pair.second); return *this;}
177  //@}
178 
179  /** Add the sample from another accumulator */
181  const WeightedSampleAccumulator& r)
182  {accumulate(r); return *this;}
183 
184  /**
185  // The effect of "operator*=" is the same as if all values
186  // were multiplied by "r" for each "accumulate" call
187  */
188  template<typename Num2>
190 
191  /**
192  // The effect of "operator/=" is the same as if all values
193  // were divided by "r" for each "accumulate" call
194  */
195  template<typename Num2>
197 
198  //@{
199  /*
200  // The binary operators are rather inefficient
201  // and should be avoided in tight loops
202  */
203  inline WeightedSampleAccumulator operator+(
204  const WeightedSampleAccumulator& r) const
205  {
206  WeightedSampleAccumulator acc(*this);
207  acc += r;
208  return acc;
209  }
210 
211  template<typename Num2>
212  inline WeightedSampleAccumulator operator*(const Num2& r) const
213  {
214  WeightedSampleAccumulator acc(*this);
215  acc *= r;
216  return acc;
217  }
218 
219  template<typename Num2>
220  inline WeightedSampleAccumulator operator/(const Num2& r) const
221  {
222  WeightedSampleAccumulator acc(*this);
223  acc /= r;
224  return acc;
225  }
226  //@}
227 
228  /**
229  // Scale the weights. The effect is the same as if all
230  // weights were multiplied by "r" for each "accumulate" call.
231  // Note that r can not be negative for this method.
232  */
234 
235  /** Clear all accumulated data */
236  void reset();
237 
238  /** Reserve memory for points to be accumulated in the future */
239  inline void reserve(const unsigned long n) {data_.reserve(n);}
240 
241  //@{
242  /** Method related to "geners" I/O */
243  inline gs::ClassId classId() const {return gs::ClassId(*this);}
244  bool write(std::ostream& of) const;
245  //@}
246 
247  static const char* classname();
248  static inline unsigned version() {return 1;}
249  static void restore(const gs::ClassId& id, std::istream& in,
250  WeightedSampleAccumulator* acc);
251  private:
252  void sort() const;
253  Precise stdev2(bool isMeanUncertainty) const;
254 
255  std::vector<std::pair<Numeric,double> > data_;
256  long double wsum_;
257  long double wsumsq_;
258  double maxWeight_;
259  unsigned long callCount_;
260  bool sorted_;
261 
262 #ifdef SWIG
263  public:
264  inline WeightedSampleAccumulator mul2(const double r) const
265  {return operator*(r);}
266 
267  inline WeightedSampleAccumulator div2(const double r) const
268  {return operator/(r);}
269 
270  inline WeightedSampleAccumulator& imul2(const double r)
271  {*this *= r; return *this;}
272 
273  inline WeightedSampleAccumulator& idiv2(const double r)
274  {*this /= r; return *this;}
275 
276  inline WeightedSampleAccumulator& operator+=(
277  const std::pair<Numeric,double>& pair)
278  {accumulate(pair.first, pair.second); return *this;}
279 
280  inline void accumulate(const std::pair<Numeric,double>* values,
281  const unsigned long n)
282  {
283  if (n)
284  {
285  assert(values);
286  data_.reserve(data_.size() + n);
287  for (unsigned long i=0; i<n; ++i)
288  accumulate(values[i].first, values[i].second);
289  }
290  }
291 
292  inline void accumulate(const std::vector<std::pair<Numeric,double> >& values)
293  {
294  const unsigned long n = values.size();
295  if (n)
296  {
297  data_.reserve(data_.size() + n);
298  for (unsigned long i=0; i<n; ++i)
299  accumulate(values[i].first, values[i].second);
300  }
301  }
302 #endif // SWIG
303  };
304 
305  typedef WeightedSampleAccumulator<float,long double> FloatWeightedSampleAccumulator;
306  typedef WeightedSampleAccumulator<double,long double> DoubleWeightedSampleAccumulator;
307 }
308 
309 #include "npstat/stat/WeightedSampleAccumulator.icc"
310 
311 #endif // NPSTAT_WEIGHTEDSAMPLEACCUMULATOR_HH_
Compile-time deduction of an appropriate precise numeric type.
Definition: WeightedSampleAccumulator.hh:33
void accumulate(const WeightedSampleAccumulator &acc)
double cdf(Numeric value) const
Precise noThrowStdev(const Precise &valueIfNoData) const
WeightedSampleAccumulator & scaleWeights(double r)
Numeric sigmaRange() const
Definition: WeightedSampleAccumulator.hh:103
Precise noThrowMeanUncertainty(const Precise &valueIfNoData) const
double weightBelow(Numeric value) const
Numeric location() const
Definition: WeightedSampleAccumulator.hh:91
void accumulate(const Numeric &value, double weight)
WeightedSampleAccumulator & operator+=(const WeightedSampleAccumulator &r)
Definition: WeightedSampleAccumulator.hh:180
double maxWeight() const
Definition: WeightedSampleAccumulator.hh:41
unsigned long nfills() const
Definition: WeightedSampleAccumulator.hh:54
WeightedSampleAccumulator & operator/=(const Num2 &r)
void accumulate(const Pair *values, unsigned long n)
void reserve(const unsigned long n)
Definition: WeightedSampleAccumulator.hh:239
gs::ClassId classId() const
Definition: WeightedSampleAccumulator.hh:243
Precise noThrowMean(const Precise &valueIfNoData) const
unsigned long ncalls() const
Definition: WeightedSampleAccumulator.hh:48
bool operator!=(const WeightedSampleAccumulator &r) const
WeightedSampleAccumulator & operator*=(const Num2 &r)
bool operator==(const WeightedSampleAccumulator &r) const
const std::pair< Numeric, double > * data() const
Definition: WeightedSampleAccumulator.hh:143
Numeric quantile(double x) const
Definition: AbsArrayProjector.hh:14