npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
AbsBandwidthCV.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_ABSBANDWIDTHCV_HH_
2 #define NPSTAT_ABSBANDWIDTHCV_HH_
3 
4 /*!
5 // \file AbsBandwidthCV.hh
6 //
7 // \brief Interface definitions for KDE or LOrPE cross-validation calculations
8 //
9 // Implementations of these interfaces should calculate the quantity
10 // which is to be maximized (pseudo log likelihood, -AMISE, etc.)
11 //
12 // Author: I. Volobouev
13 //
14 // September 2010
15 */
16 
17 #include <vector>
18 #include <utility>
19 
20 #include "npstat/stat/HistoND.hh"
21 
22 #ifdef SWIG
23 #include <stdexcept>
24 #include "npstat/wrap/arrayNDToNumpy.hh"
25 #include "npstat/wrap/numpyArrayUtils.hh"
26 #endif // SWIG
27 
28 namespace npstat {
29  // Forward declarations
30  struct AbsPolyFilter1D;
31  struct AbsPolyFilterND;
32 
33  /**
34  // Cross-validation for univariate densities.
35  // "Numeric" template parameter is the type of histogram bins.
36  // "Num2" is the type produced by the density estimation code
37  // (usually just double). "Num3" is the type of point coordinates
38  // used to fill the histogram (normally, float or double), and
39  // "Num4" is the type of point weights.
40  */
41  template<
42  typename Numeric,
43  typename Num2,
44  typename Num3 = double,
45  typename Num4 = double
46  >
48  {
49  typedef Numeric bin_type;
50  typedef Num2 density_type;
51  typedef Num3 coord_type;
52  typedef Num4 weight_type;
53 
54  inline virtual ~AbsBandwidthCV1D() {}
55 
56  /**
57  // It should be assumed that the "nFillsInRange" method of the
58  // argument histogram returns the actual number of fills (that is,
59  // the histogram represents an actual collection of points, has
60  // possible bin values of 0, 1, 2, ..., and it is not scaled).
61  //
62  // "densityEstimate" is allowed to be an estimate without truncation
63  // (even if it includes negative values). Dependence of the
64  // optimized quantity on the bandwidth should be smoother without
65  // truncation. Naturally, the order of values in "densityEstimate"
66  // is the same as the order of bins in "histo".
67  //
68  // The "filterUsed" parameter is needed in order to be able to
69  // do "leave-one-out" type of cross-validation. In such
70  // calculations one has to know how much of the density estimate
71  // at coordinate x is contributed by the source point at x. When
72  // density estimates are done on the grid, the filters differ for
73  // the center of the grid and on the border. Because of this, we
74  // will need to have an access to the complete filter collection.
75  */
76  virtual double operator()(
77  const HistoND<Numeric>& histo,
78  const Num2* densityEstimate, unsigned lenEstimate,
79  const AbsPolyFilter1D& filterUsed) const = 0;
80 
81  /** Cross-validation for samples of univariate weighted points */
82  virtual double operator()(
83  const HistoND<Numeric>& histo,
84  double effectiveSampleSize,
85  const Num2* densityEstimate, unsigned lenEstimate,
86  const AbsPolyFilter1D& filterUsed) const = 0;
87 
88  /**
89  // Cross-validation for samples of univariate weighted points
90  // which can be used when the original sample is available.
91  // For the sample points, the first element of the pair is
92  // the point coordinate and the second element is the weight.
93  // The histogram must be created in advance using that particular
94  // sample.
95  */
96  virtual double operator()(
97  const HistoND<Numeric>& histo,
98  const std::pair<Num3, Num4>* sample, unsigned long lenSample,
99  const Num2* densityEstimate, unsigned lenEstimate,
100  const AbsPolyFilter1D& filterUsed) const = 0;
101 
102  inline double operator()(
103  const HistoND<Numeric>& histo,
104  const std::vector<std::pair<Num3, Num4> >& sample,
105  const Num2* densityEstimate, unsigned lenEstimate,
106  const AbsPolyFilter1D& filterUsed) const
107  {
108  const std::pair<Num3, Num4>* ps = 0;
109  const unsigned long lenSample = sample.size();
110  if (lenSample)
111  ps = &sample[0];
112  return this->operator()(histo, ps, lenSample,
113  densityEstimate, lenEstimate, filterUsed);
114  }
115 
116 #ifdef SWIG
117  inline double cv(
118  const HistoND<Numeric,HistoAxis>& histo,
119  PyObject* input,
120  const AbsPolyFilter1D& filterUsed) const
121  {
122  const int typenum = numpyArrayType(input);
123  if (typenum != NumpyTypecode<double>::code)
124  throw std::invalid_argument("In npstat::AbsBandwidthCV1D::cv: "
125  "unsupported input array type");
126  const unsigned datalen = histo.nBins();
127  if (!isNumpyShapeCompatible(input, &datalen, 1U)) throw std::invalid_argument(
128  "In npstat::AbsBandwidthCV1D::cv: incompatible array shape");
129  const Num2* in = (double*)PyArray_DATA((PyArrayObject*)input);
130  return this->operator()(histo, in, datalen, filterUsed);
131  }
132 
133  inline double cvWeighted(
134  const HistoND<Numeric,HistoAxis>& histo,
135  const double effectiveSampleSize,
136  PyObject* input,
137  const AbsPolyFilter1D& filterUsed) const
138  {
139  const int typenum = numpyArrayType(input);
140  if (typenum != NumpyTypecode<double>::code)
141  throw std::invalid_argument("In npstat::AbsBandwidthCV1D::cvWeighted: "
142  "unsupported input array type");
143  const unsigned datalen = histo.nBins();
144  if (!isNumpyShapeCompatible(input, &datalen, 1U)) throw std::invalid_argument(
145  "In npstat::AbsBandwidthCV1D::cvWeighted: incompatible array shape");
146  const Num2* in = (double*)PyArray_DATA((PyArrayObject*)input);
147  return this->operator()(histo, effectiveSampleSize, in, datalen, filterUsed);
148  }
149 
150  inline double cvWeightedSample(
151  const HistoND<Numeric,HistoAxis>& histo,
152  const std::vector<std::pair<Num3, Num4> >& sample,
153  PyObject* input,
154  const AbsPolyFilter1D& filterUsed) const
155  {
156  const int typenum = numpyArrayType(input);
157  if (typenum != NumpyTypecode<double>::code)
158  throw std::invalid_argument("In npstat::AbsBandwidthCV1D::cvWeightedSample: "
159  "unsupported input array type");
160  const unsigned datalen = histo.nBins();
161  if (!isNumpyShapeCompatible(input, &datalen, 1U)) throw std::invalid_argument(
162  "In npstat::AbsBandwidthCV1D::cvWeightedSample: incompatible array shape");
163  const Num2* in = (double*)PyArray_DATA((PyArrayObject*)input);
164  return this->operator()(histo, sample, in, datalen, filterUsed);
165  }
166 #endif // SWIG
167  };
168 
169  /**
170  // Cross-validation for multivariate densities.
171  // "Numeric" template parameter is the type of histogram bins.
172  // "Array" template parameter should be one of ArrayND types.
173  */
174  template<typename Numeric, class Array>
176  {
177  typedef Numeric bin_type;
178  typedef Array density_type;
179 
180  inline virtual ~AbsBandwidthCVND() {}
181 
182  /**
183  // It should be assumed that the "nFillsInRange" method of the
184  // argument histogram returns the actual number of fills.
185  //
186  // "densityEstimate" is allowed to be an estimate without truncation
187  // (even if it includes negative values).
188  */
189  virtual double operator()(
190  const HistoND<Numeric>& histo,
191  const Array& densityEstimate,
192  const AbsPolyFilterND& filterUsed) const = 0;
193 
194  /** Cross-validation for samples of multivariate weighted points */
195  virtual double operator()(
196  const HistoND<Numeric>& histo,
197  double effectiveSampleSize,
198  const Array& densityEstimate,
199  const AbsPolyFilterND& filterUsed) const = 0;
200 
201 #ifdef SWIG
202  inline double cv(
203  const HistoND<Numeric,HistoAxis>& histo,
204  PyObject* input,
205  const AbsPolyFilterND& filterUsed) const
206  {
207  const int typenum = numpyArrayType(input);
208  if (typenum != NumpyTypecode<double>::code)
209  throw std::invalid_argument("In npstat::AbsBandwidthCVND::cv: "
210  "unsupported input array type");
211  const unsigned* shape = histo.binContents().shapeData();
212  if (!isNumpyShapeCompatible(input, shape, histo.dim()))
213  throw std::invalid_argument("In npstat::AbsBandwidthCVND::cv: "
214  "incompatible array shape");
215  double* in = (double*)PyArray_DATA((PyArrayObject*)input);
216  const ArrayND<double>& wrap = externalMemArrayND(in, shape, histo.dim());
217  return this->operator()(histo, wrap, filterUsed);
218  }
219 
220  inline double cvWeighted(
221  const HistoND<Numeric,HistoAxis>& histo,
222  const double effectiveSampleSize,
223  PyObject* input,
224  const AbsPolyFilterND& filterUsed) const
225  {
226  const int typenum = numpyArrayType(input);
227  if (typenum != NumpyTypecode<double>::code)
228  throw std::invalid_argument("In npstat::AbsBandwidthCVND::cvWeighted: "
229  "unsupported input array type");
230  const unsigned* shape = histo.binContents().shapeData();
231  if (!isNumpyShapeCompatible(input, shape, histo.dim()))
232  throw std::invalid_argument("In npstat::AbsBandwidthCVND::cvWeighted: "
233  "incompatible array shape");
234  double* in = (double*)PyArray_DATA((PyArrayObject*)input);
235  const ArrayND<double>& wrap = externalMemArrayND(in, shape, histo.dim());
236  return this->operator()(histo, effectiveSampleSize, wrap, filterUsed);
237  }
238 #endif // SWIG
239  };
240 }
241 
242 #endif // NPSTAT_ABSBANDWIDTHCV_HH_
Arbitrary-dimensional histogram template.
const unsigned * shapeData() const
Definition: ArrayND.hh:335
Definition: HistoND.hh:46
const ArrayND< Numeric > & binContents() const
Definition: HistoND.hh:197
unsigned long nBins() const
Definition: HistoND.hh:226
unsigned dim() const
Definition: HistoND.hh:187
Definition: AbsArrayProjector.hh:14
ArrayND< Numeric > externalMemArrayND(Numeric *data, const unsigned *shape, unsigned dim)
Definition: AbsBandwidthCV.hh:48
virtual double operator()(const HistoND< Numeric > &histo, const Num2 *densityEstimate, unsigned lenEstimate, const AbsPolyFilter1D &filterUsed) const =0
virtual double operator()(const HistoND< Numeric > &histo, double effectiveSampleSize, const Num2 *densityEstimate, unsigned lenEstimate, const AbsPolyFilter1D &filterUsed) const =0
virtual double operator()(const HistoND< Numeric > &histo, const std::pair< Num3, Num4 > *sample, unsigned long lenSample, const Num2 *densityEstimate, unsigned lenEstimate, const AbsPolyFilter1D &filterUsed) const =0
Definition: AbsBandwidthCV.hh:176
virtual double operator()(const HistoND< Numeric > &histo, const Array &densityEstimate, const AbsPolyFilterND &filterUsed) const =0
virtual double operator()(const HistoND< Numeric > &histo, double effectiveSampleSize, const Array &densityEstimate, const AbsPolyFilterND &filterUsed) const =0
Definition: AbsPolyFilter1D.hh:27
Definition: AbsPolyFilterND.hh:27