npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
amiseOptimalBandwidth.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_AMISEOPTIMALBANDWIDTH_HH_
2 #define NPSTAT_AMISEOPTIMALBANDWIDTH_HH_
3 
4 /*!
5 // \file amiseOptimalBandwidth.hh
6 //
7 // \brief Optimal AMISE bandwidth for KDE with high order kernels
8 //
9 // AMISE stands for Asymptotic Mean Integrated Squared Error.
10 // The formulae used in this code come from the paper "Bandwidth
11 // Selection in Kernel Density Estimation: A Review" by B.A. Turlach.
12 //
13 // Author: I. Volobouev
14 //
15 // July 2010
16 */
17 
18 namespace npstat {
19  /**
20  // Estimate optimal bandwidth for filters generated by the Gaussian
21  // distribution used as the weight function. The arguments are as follows:
22  //
23  // filterDegree -- Degree of the filter (in the LOrPE sense). This
24  // degree is less by 2 than the order of the kernel
25  // in the Turlach's paper. For example, filter of
26  // degree 0 corresponds to kernel of order 2 (original
27  // Gaussian), filter of degree 2 corresponds to kernel
28  // of order 4, etc. Here, the degree must be even.
29  // Maximum possible filter degree is 42.
30  //
31  // npoints -- Number of points in the data sample.
32  //
33  // fvalues -- Array of scanned values of the reference density.
34  // Note that the contents of this array are NOT preserved.
35  //
36  // arrLen -- Number of elements in the array "fvalues".
37  //
38  // h -- Step with which the scan of the reference density
39  // was performed.
40  //
41  // expectedAmise -- If this argument is provided, it will be filled
42  // with the expected AMISE value.
43  //
44  // The formulae used involve numerical evaluations of the derivatives
45  // of the scanned reference density. For a given filterDegree, the
46  // code needs to calculate the derivative of order filterDegree + 2.
47  // Make sure the scan step is chosen appropriately for this kind of
48  // calculation (see, for example, the "Numerical Recipes" book in order
49  // to understand the issues involved).
50  */
51  template<typename Real>
52  double amiseOptimalBwGauss(unsigned filterDegree, double npoints,
53  Real* fvalues, unsigned long arrLen,
54  Real h, double* expectedAmise = 0);
55 
56  /**
57  // Estimate optimal bandwidth for filters generated by the symmetric
58  // beta distribution used as the weight function. The "power" argument
59  // is the power of the symmetric beta, limited here to unsigned integers.
60  // Maximum possible power value is 10. The meaning of all other arguments
61  // is the same as in the function npstat::amiseOptimalBwGauss.
62  */
63  template<typename Real>
64  double amiseOptimalBwSymbeta(unsigned power, unsigned filterDegree,
65  double npoints,
66  Real* fvalues, unsigned long arrLen,
67  Real h, double* expectedAmise = 0);
68 
69  /**
70  // Plug-in bandwidth for filters generated by the Gaussian distribution
71  // used as the weight function
72  */
73  double amisePluginBwGauss(unsigned filterDegree, double npoints,
74  double sampleSigma, double* expectedAmise = 0);
75 
76  /**
77  // Approximate version of "amisePluginBwGauss" for use with non-integer
78  // filter degrees
79  */
80  double approxAmisePluginBwGauss(double filterDegree, double npoints,
81  double sampleSigma);
82 
83  /**
84  // Plug-in bandwidth for filters generated by the symmetric beta
85  // distribution used as the weight function. The "power" argument
86  // is the power of the symmetric beta, limited here to unsigned integers.
87  // Maximum possible power value is 10.
88  */
89  double amisePluginBwSymbeta(unsigned power, unsigned filterDegree,
90  double npoints, double sampleSigma,
91  double* expectedAmise = 0);
92 
93  /**
94  // Ratio of the symmetric beta kernel AMISE bandwidth to the Gaussian
95  // kernel AMISE bandwidth (as in the concept of "canonical bandwidth").
96  //
97  // 1.0 will be returned in case the "power" argument is negative.
98  */
99  double symbetaBandwidthRatio(int power, unsigned filterDegree);
100 
101  /**
102  // Approximate continuous version of "symbetaBandwidthRatio" for use with
103  // non-integer filter degrees (uses a polynomial fit to the original).
104  // Can be used in combination with "approxAmisePluginBwGauss" to derive
105  // a continuous version of symmetric beta bandwidth.
106  */
107  double approxSymbetaBandwidthRatio(int power, double filterDegree);
108 
109  /** AMISE-optimal filter degree for the Gaussian kernel */
110  unsigned amisePluginDegreeGauss(double npoints);
111 
112  /** AMISE-optimal filter degree for symmetric beta kernels */
113  unsigned amisePluginDegreeSymbeta(unsigned power, double npoints);
114 
115  /** Maximum filter degree supported by AMISE calculations */
117 
118  /**
119  // Integral of kernel squared for Gaussian (power < 0) and
120  // symmetric Beta kernels
121  */
123 
124  /**
125  // Integral of kernel squared for Gaussian (power < 0) and
126  // symmetric Beta kernels within some definite limits
127  */
128  double integralOfSymmetricBetaSquared(int power, double a, double b);
129 }
130 
131 #ifdef SWIG
132 
133 #include <cassert>
134 #include <vector>
135 #include <utility>
136 
137 namespace npstat {
138  inline std::pair<double, double>
139  amiseOptimalBwGauss2(unsigned filterDegree, double npoints,
140  const double* fvalues, unsigned long arrLen,
141  double h)
142  {
143  assert(fvalues);
144  assert(arrLen);
145 
146  std::vector<double> tmp(fvalues, fvalues+arrLen);
147  double expectedAmise;
148  const double bw = amiseOptimalBwGauss(filterDegree, npoints, &tmp[0],
149  arrLen, h, &expectedAmise);
150  return std::make_pair(bw, expectedAmise);
151  }
152 
153  inline std::pair<double, double>
154  amiseOptimalBwSymbeta2(unsigned power, unsigned filterDegree,
155  double npoints,
156  const double* fvalues, unsigned long arrLen,
157  double h)
158  {
159  assert(fvalues);
160  assert(arrLen);
161 
162  std::vector<double> tmp(fvalues, fvalues+arrLen);
163  double expectedAmise;
164  const double bw = amiseOptimalBwSymbeta(power, filterDegree,
165  npoints, &tmp[0],
166  arrLen, h, &expectedAmise);
167  return std::make_pair(bw, expectedAmise);
168  }
169 }
170 #endif // SWIG
171 
172 #include "npstat/stat/amiseOptimalBandwidth.icc"
173 
174 #endif // NPSTAT_AMISEOPTIMALBANDWIDTH_HH_
Definition: AbsArrayProjector.hh:14
double symbetaBandwidthRatio(int power, unsigned filterDegree)
double amisePluginBwGauss(unsigned filterDegree, double npoints, double sampleSigma, double *expectedAmise=0)
double amiseOptimalBwSymbeta(unsigned power, unsigned filterDegree, double npoints, Real *fvalues, unsigned long arrLen, Real h, double *expectedAmise=0)
unsigned amisePluginDegreeGauss(double npoints)
unsigned maxFilterDegreeSupported()
unsigned amisePluginDegreeSymbeta(unsigned power, double npoints)
double approxSymbetaBandwidthRatio(int power, double filterDegree)
double integralOfSymmetricBetaSquared(int power)
double amiseOptimalBwGauss(unsigned filterDegree, double npoints, Real *fvalues, unsigned long arrLen, Real h, double *expectedAmise=0)
double approxAmisePluginBwGauss(double filterDegree, double npoints, double sampleSigma)
double amisePluginBwSymbeta(unsigned power, unsigned filterDegree, double npoints, double sampleSigma, double *expectedAmise=0)