npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
arrayStats.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_ARRAYSTATS_HH_
2 #define NPSTAT_ARRAYSTATS_HH_
3 
4 /*!
5 // \file arrayStats.hh
6 //
7 // \brief Various descriptive statistics for 1-d and multidimensional arrays
8 //
9 // Author: I. Volobouev
10 //
11 // July 2010
12 */
13 
14 #include <vector>
15 #include <utility>
16 #include <stdexcept>
17 
18 #include "npstat/nm/BoxND.hh"
19 #include "npstat/nm/Matrix.hh"
20 
21 namespace npstat {
22  /**
23  // This function estimates population mean, standard
24  // deviation, skewness, and kurtosis. The array must have at least
25  // one element. A numerically sound two-pass algorithm is used.
26  //
27  // Any of the output pointers can be specified as NULL if the
28  // corresponding quantity is not needed (this might speed the code up).
29  */
30  template<typename Numeric>
31  void arrayStats(const Numeric* arr, unsigned long arrLen,
32  double* mean, double* stdev,
33  double* skewness=0, double* kurtosis=0);
34 
35  /** Function for calculating sample moments w.r.t. some point */
36  template<typename Numeric>
37  long double arrayMoment(const Numeric* arr, unsigned long arrLen,
38  long double center, unsigned order);
39 
40  /**
41  // Function for calculating multiple sample moments w.r.t. some
42  // point. The array "moments" should have at least "maxOrder+1"
43  // elements. Upon exit, moments[k] will be set to moment of order k.
44  */
45  template<typename Numeric>
46  void arrayMoments(const Numeric* arr, unsigned long arrLen,
47  long double center, unsigned maxOrder,
48  long double* moments);
49 
50  /**
51  // Function for calculating multiple sample cumulants (k-statistics).
52  // The array "cumulants" should have at least "maxOrder+1" elements.
53  // Upon exit, cumulants[k] will be set to the sample cumulant of order k.
54  // Currently, "maxOrder" argument can not exceed 6. Formulae for these
55  // can be found, for example, in "Kendall's Advanced Theory of Statistics"
56  // by Stuart and Ord.
57  */
58  template<typename Numeric>
59  void arrayCumulants(const Numeric* arr, unsigned long arrLen,
60  unsigned maxOrder, long double* cumulants);
61 
62  /**
63  // Function for calculating multiple sample central moments and
64  // estimating their uncertainties (if requested). The array "moments"
65  // should have at least "maxOrder+1" elements. If provided, the
66  // array "momentUncertainties" should also have at least "maxOrder+1"
67  // elements. Upon exit, moments[0] will be set to 1 and moments[1]
68  // will be set to the sample mean. For k > 1, moments[k] will be set
69  // to the central moment of order k. The corresponding uncertainties
70  // will be estimated approximately, to O(1/n), by substituting the
71  // sample moments for the population moments in the asymptotic
72  // formulae. The relevant formulae can be found, for example, in the
73  // monograph "Mathematical Methods of Statistics" by Harald Cramer.
74  */
75  template<typename Numeric>
76  void arrayCentralMoments(const Numeric* arr, unsigned long arrLen,
77  unsigned maxOrder, long double* moments,
78  long double* momentUncertainties = 0);
79 
80  /**
81  // Calculate the mean along each coordinate using array values as
82  // weights. The coordinate ranges are defined by the given box,
83  // and the array points are assumed to be in the centers of the bins,
84  // like in a histogram. The results are stored in the "mean" array
85  // whose length (given by lengthMean) must not be smaller than
86  // the rank of the array.
87  //
88  // For this function, the array dimensionality can not exceed
89  // CHAR_BIT*sizeof(unsigned long) which is normally 64 on 64-bit machines.
90  */
91  template<class Array>
92  void arrayCoordMean(const Array& a, const BoxND<double>& limits,
93  double* mean, unsigned lengthMean);
94 
95  /**
96  // Calculate the array covariance matrix. The "limits" argument has
97  // the same meaning as in the arrayCoordMean function.
98  */
99  template<class Array, unsigned Len>
100  void arrayCoordCovariance(const Array& a, const BoxND<double>& limits,
101  Matrix<double,Len>* covarianceMatrix);
102 
103  /**
104  // One-dimensional mean, variance, skewness, and kurtosis
105  // using array values as weights. Any of the argument pointers
106  // can be NULL in which case the corresponding quantity is not
107  // calculated. The code estimates population shape quantities
108  // rather than sample quantities (so that it can be best used
109  // with equidistantly binned histograms and such).
110  */
111  template<class Array>
112  void arrayShape1D(const Array& a, double xmin, double xmax,
113  double* mean, double* stdev,
114  double* skewness, double* kurtosis);
115 
116  /**
117  // Quantiles for 1-d arrays in which array values are used as weights.
118  // Essentially, the values are treated as histogram bin contents.
119  // All input "qvalues" should be between 0 and 1. The results are
120  // returned in the corresponding elements of the "quantiles" array.
121  // The function will work faster if "qvalues" are sorted in the
122  // increasing order.
123  //
124  // If you expect to call this function more than once using the same
125  // data, it is likely that you can obtain the same information more
126  // efficiently by creating the "BinnedDensity1D" object with its
127  // interpolationDegree argument set to 0 and then using its "quantile"
128  // method.
129  */
130  template<class Numeric>
131  void arrayQuantiles1D(const Numeric* data, unsigned long len,
132  double xmin, double xmax, const double* qvalues,
133  double* quantiles, unsigned nqvalues);
134 
135  /**
136  // This function returns negative sum of p_i ln(p_i) over array
137  // elements p_i, divided by the total number of array elements.
138  // All elements must be non-negative, and there must be at least
139  // one positive element present. Useful for calculating entropies
140  // of distributions represented on a grid. For example, mutual
141  // information of two variables is just the entropy of their copula.
142  */
143  template<class Numeric>
144  double arrayEntropy(const Numeric* p, unsigned long len,
145  bool normalize = false);
146 
147  /**
148  // This function assumes that Poisson distribution parameters are
149  // given in the "means" array while the array "counts" contains
150  // corresponding observations. "len" is the length of both "means"
151  // and "counts" arrays.
152  */
153  template<typename Real, typename Numeric>
154  double poissonLogLikelihood(const Real* means, const Numeric* counts,
155  unsigned long len);
156 
157 #ifdef SWIG
158  inline void arrayStats_2(const double* arr, unsigned long arrLen,
159  double* mean, double* stdev,
160  double* skewness, double* kurtosis)
161  {
162  arrayStats(arr, arrLen, mean, stdev, skewness, kurtosis);
163  }
164 
165  inline std::vector<double> arrayCumulants_2(
166  const double* arr, unsigned long arrLen, unsigned maxOrder)
167  {
168  std::vector<long double> vec(maxOrder+1U);
169  arrayCumulants(arr, arrLen, maxOrder, &vec[0]);
170  return std::vector<double>(vec.begin(), vec.end());
171  }
172 
173  inline std::vector<double> arrayMoments_2(
174  const double* arr, unsigned long arrLen,
175  double center, unsigned maxOrder)
176  {
177  std::vector<long double> vec(maxOrder+1U);
178  arrayMoments(arr, arrLen, center, maxOrder, &vec[0]);
179  return std::vector<double>(vec.begin(), vec.end());
180  }
181 
182  inline std::vector<std::pair<double,double> > arrayCentralMoments_2(
183  const double* arr, unsigned long arrLen, unsigned maxOrder)
184  {
185  std::vector<long double> buffer(2U*maxOrder+2U);
186  long double* moments = &buffer[0];
187  long double* uncerts = moments + (maxOrder + 1U);
188  arrayCentralMoments(arr, arrLen, maxOrder, moments, uncerts);
189  std::vector<std::pair<double,double> > result(maxOrder + 1U);
190  for (unsigned i=0; i<=maxOrder; ++i)
191  result[i] = std::pair<double,double>(moments[i], uncerts[i]);
192  return result;
193  }
194 
195  double poissonLogLikelihood_2(
196  const double* means, unsigned long meansLen,
197  const double* counts, unsigned long len)
198  {
199  if (meansLen != len) throw std::invalid_argument(
200  "In npstat::poissonLogLikelihood_2: incompatible input arrays");
201  return poissonLogLikelihood(means, counts, len);
202  }
203 #endif // SWIG
204 }
205 
206 #include "npstat/stat/arrayStats.icc"
207 
208 #endif // NPSTAT_ARRAYSTATS_HH_
Template to represent rectangles, boxes, and hyperboxes.
Template matrix class.
Definition: Matrix.hh:49
Definition: AbsArrayProjector.hh:14
double poissonLogLikelihood(const Real *means, const Numeric *counts, unsigned long len)
void arrayCoordMean(const Array &a, const BoxND< double > &limits, double *mean, unsigned lengthMean)
void arrayStats(const Numeric *arr, unsigned long arrLen, double *mean, double *stdev, double *skewness=0, double *kurtosis=0)
double arrayEntropy(const Numeric *p, unsigned long len, bool normalize=false)
void arrayMoments(const Numeric *arr, unsigned long arrLen, long double center, unsigned maxOrder, long double *moments)
void arrayCoordCovariance(const Array &a, const BoxND< double > &limits, Matrix< double, Len > *covarianceMatrix)
void arrayCentralMoments(const Numeric *arr, unsigned long arrLen, unsigned maxOrder, long double *moments, long double *momentUncertainties=0)
void arrayCumulants(const Numeric *arr, unsigned long arrLen, unsigned maxOrder, long double *cumulants)
void arrayShape1D(const Array &a, double xmin, double xmax, double *mean, double *stdev, double *skewness, double *kurtosis)
long double arrayMoment(const Numeric *arr, unsigned long arrLen, long double center, unsigned order)
void arrayQuantiles1D(const Numeric *data, unsigned long len, double xmin, double xmax, const double *qvalues, double *quantiles, unsigned nqvalues)