npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
StatUtils.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_STATUTILS_HH_
2 #define NPSTAT_STATUTILS_HH_
3 
4 /*!
5 // \file StatUtils.hh
6 //
7 // \brief Statistical utilities which did not end up in dedicated headers
8 //
9 // Author: I. Volobouev
10 //
11 // March 2010
12 */
13 
14 #include <vector>
15 
16 namespace npstat {
17  /**
18  // This function calculates an empirical quantile
19  // from a _sorted_ vector of data points. If the "increaseRange"
20  // parameter is "true" then the value returned for x = 0.0
21  // will be smaller than data[0] and the value returned for
22  // x = 1.0 will be larger than the largest data value.
23  */
24  template<typename Data>
25  Data empiricalQuantile(const std::vector<Data>& data, double x,
26  bool increaseRange=false);
27 
28  /**
29  // The inverse to the npstat::empiricalQuantile (if there are no
30  // duplicate entries in the data). The data vector must be sorted.
31  */
32  template<typename Data>
33  double empiricalCdf(const std::vector<Data>& data, const Data& x);
34 
35  /**
36  // Returns the number of points in the data with values below
37  // or equal to x divided by the data size. The data vector
38  // must be sorted.
39  */
40  template<typename Data>
41  double simpleEmpiricalCdf(const std::vector<Data>& data, const Data& x);
42 
43  /**
44  // Find the bin number corresponding to the given cdf value in
45  // an array which represents a cumulative distribution function
46  // (the numbers in the array must increase). It is expected that
47  // the "cdfValue" input is between cdf[0] and cdf[arrLen-1].
48  */
49  template<typename Data>
50  unsigned long quantileBinFromCdf(const Data* cdf, unsigned long arrLen,
51  Data cdfValue, Data* remainder = 0);
52 
53  /**
54  // This function returns the mathematical functional R(d^n f(x)/d x^n),
55  // where function f(x) is given by its tabulated values on a grid
56  // with constant distance h between points (it is assumed that each
57  // value is given in the middle of a cell, like in a histogram). The
58  // functional R(y(x)) is, by definition, the integral of y(x) squared.
59  // d^n f(x)/d x^n is the derivative of order n.
60  //
61  // Note that the table of function values is NOT preserved.
62  */
63  template<typename Real>
64  Real squaredDerivativeIntegral(Real* fvalues, unsigned long arrLen,
65  unsigned n, Real h);
66 
67  /**
68  // This function sets all negative elements of the input array to zero
69  // and normalizes it so that the sum of the elements times the "binwidth"
70  // argument becomes 1. If the input array is nowhere positive,
71  // std::runtime_error is thrown. "true" is returned in case any negative
72  // array elements are found, otherwise the function returns "false".
73  // Upon exit (and if the "normfactor" pointer is not NULL), value of
74  // *normfactor is set to the factor by which array elements are multiplied
75  // so that they become normalized.
76  */
77  template<typename Real>
78  bool normalizeArrayAsDensity(Real* arr, unsigned long arrLen,
79  double binwidth, double* normfactor=0);
80 
81  /** Akaike information criterion corrected for the sample size */
82  double aicc(const double ndof, const double logli, const double n);
83 
84  /**
85  // The code for the distribution of Anderson-Darling test statistic comes
86  // from "Evaluating the Anderson-Darling Distribution" by G. Marsaglia and
87  // J. Marsaglia, Journal of Statistical Software, vol. 9, issue 2,
88  // pp. 1-5 (2004).
89  */
90  double AD(const double n, const double z);
91 
92 #ifdef SWIG
93  inline bool normalizeArrayAsDensity_2(double* pyarr, unsigned long arrLen,
94  double binwidth, double *OUTPUT)
95  {
96  return normalizeArrayAsDensity(pyarr, arrLen, binwidth, OUTPUT);
97  }
98 #endif // SWIG
99 }
100 
101 #include "npstat/stat/StatUtils.icc"
102 
103 #endif // NPSTAT_STATUTILS_HH_
Definition: AbsArrayProjector.hh:14
double simpleEmpiricalCdf(const std::vector< Data > &data, const Data &x)
unsigned long quantileBinFromCdf(const Data *cdf, unsigned long arrLen, Data cdfValue, Data *remainder=0)
double empiricalCdf(const std::vector< Data > &data, const Data &x)
Data empiricalQuantile(const std::vector< Data > &data, double x, bool increaseRange=false)
bool normalizeArrayAsDensity(Real *arr, unsigned long arrLen, double binwidth, double *normfactor=0)
Real squaredDerivativeIntegral(Real *fvalues, unsigned long arrLen, unsigned n, Real h)
double AD(const double n, const double z)
double aicc(const double ndof, const double logli, const double n)