npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
LocalPolyFilter1D.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_LOCALPOLYFILTER1D_HH_
2 #define NPSTAT_LOCALPOLYFILTER1D_HH_
3 
4 /*!
5 // \file LocalPolyFilter1D.hh
6 //
7 // \brief Local polynomial filtering (regression) on 1-d equidistant grids
8 //
9 // This technique generalizes the Savitzky-Golay smoothing filter to
10 // arbitrary weight functions (not just a flat weight which leads to
11 // Legendre polynomials).
12 //
13 // Author: I. Volobouev
14 //
15 // November 2009
16 */
17 
18 #include "geners/CPP11_auto_ptr.hh"
19 #include "geners/ClassId.hh"
20 
21 #include "npstat/nm/Matrix.hh"
22 
26 
27 #ifdef SWIG
28 #include <cassert>
29 #include <stdexcept>
30 
31 #include "npstat/wrap/arrayNDToNumpy.hh"
32 #include "npstat/wrap/numpyArrayUtils.hh"
33 #endif // SWIG
34 
35 namespace npstat {
36  /** This class performs local polynomial filtering in one dimension */
38  {
39  public:
40  /**
41  // Main constructor. The arguments are as follows:
42  //
43  // taper -- Damping factors for each polynomial degree
44  // (starting with the 0th order term). This can be
45  // NULL in which case it is assumed that all
46  // factors are 1.
47  //
48  // maxDegree -- Maximum degree of the polynomials. The length
49  // of the "taper" array (if not NULL) must be equal
50  // to maxDegree + 1. Note that, far away from the
51  // boundaries (where the situation is symmetric)
52  // the same filter will be produced using the same
53  // taper with an even degree N and with an odd degree
54  // N+1. Near the boundaries the filter coefficients
55  // will, of course, differ in these two cases.
56  //
57  // filterBuilder -- An instance of a class which actually builds
58  // the filters when requested. This builder
59  // is used only inside the constructor.
60  //
61  // dataLen -- The length of the data arrays which will be
62  // used with this filter (this info is needed in
63  // order to take into account the boundary effects).
64  */
65  LocalPolyFilter1D(const double* taper, unsigned maxDegree,
66  const AbsFilter1DBuilder& filterBuilder,
67  unsigned dataLen);
68 
70  LocalPolyFilter1D& operator=(const LocalPolyFilter1D&);
71 
72  virtual ~LocalPolyFilter1D();
73 
74  bool operator==(const LocalPolyFilter1D& r) const;
75  inline bool operator!=(const LocalPolyFilter1D& r) const
76  {return !(*this == r);}
77 
78  //@{
79  /** Inspect object properties */
80  double taper(unsigned degree) const;
81  inline unsigned maxDegree() const {return maxDegree_;}
82  inline unsigned dataLen() const {return nbins_;}
83  inline const std::vector<double>& getBandwidthFactors() const
84  {return bandwidthFactors_;}
85  //@}
86 
87  /** Self contribution needed for cross-validation */
88  double selfContribution(unsigned binNumber) const;
89 
90  /** Get the filter coefficients for the given bin */
91  const PolyFilter1D& getFilter(unsigned binNumber) const;
92 
93  /**
94  // This method performs the filtering. "dataLen",
95  // which is the length of both "in" and "out" arrays,
96  // must be the same as the one in the constructor.
97  */
98  template <typename Tin, typename Tout>
99  void filter(const Tin* in, unsigned dataLen, Tout* out) const;
100 
101  /**
102  // A diffent filtering method in which the shapes of the
103  // kernels are determined by the positions of the "sources"
104  // (i.e., sample points) instead of the positions at which
105  // the density (or response) is estimated.
106  */
107  template <typename Tin, typename Tout>
108  void convolve(const Tin* in, unsigned dataLen, Tout* out) const;
109 
110  /**
111  // Generate the complete (non-sparse) representation of the filter.
112  // It will be a generalized stochastic matrix (each row sums to 1).
113  */
115 
116  /**
117  // Generate a doubly stochastic filter out of this one.
118  // Such filters are useful for sequential copula smoothing.
119  // NULL pointer will be returned in case the requested
120  // margin tolerance is positive and can not be reached
121  // within the number of iterations allowed.
122  */
123  CPP11_auto_ptr<LocalPolyFilter1D> doublyStochasticFilter(
124  double tolerance, unsigned maxIterations) const;
125 
126  /** An experimental filter with an adjusted eigenspectrum */
127  CPP11_auto_ptr<LocalPolyFilter1D> eigenGroomedFilter() const;
128 
129  // Methods needed for I/O
130  inline virtual gs::ClassId classId() const {return gs::ClassId(*this);}
131  virtual bool write(std::ostream& os) const;
132 
133  static inline const char* classname()
134  {return "npstat::LocalPolyFilter1D";}
135  static inline unsigned version() {return 2;}
136  static LocalPolyFilter1D* read(const gs::ClassId& id, std::istream& in);
137 
138  private:
140 
141  mutable std::vector<long double> sumBuffer_;
142  std::vector<PolyFilter1D*> unique_;
143  double* taper_;
144  PolyFilter1D** bins_;
145  unsigned nbins_;
146  unsigned maxDegree_;
147  std::vector<double> bandwidthFactors_;
148 
149  template <typename T>
150  double convolute(const T* data, const PolyFilter1D* filter) const;
151 
152  void addWeightedFilter(long double w, unsigned binNum) const;
153 
154  void clearSumBuffer() const;
155 
156  void releaseMem();
157  void copyOtherData(const LocalPolyFilter1D&);
158 
159  // The following function will assume that is was
160  // given a matrix filled with zeros as an argument
161  void fillFilterMatrix(Matrix<double>* fm) const;
162 
163 #ifdef SWIG
164  public:
165  inline LocalPolyFilter1D* doublyStochasticFilter_2(
166  double tolerance, unsigned maxIterations) const
167  {
168  CPP11_auto_ptr<LocalPolyFilter1D> f = doublyStochasticFilter(
169  tolerance, maxIterations);
170  return f.release();
171  }
172 
173  inline LocalPolyFilter1D* eigenGroomedFilter_2() const
174  {
175  CPP11_auto_ptr<LocalPolyFilter1D> f = eigenGroomedFilter();
176  return f.release();
177  }
178 
179  inline PyObject* filter_2(const double* in, unsigned dataLen,
180  const bool makeNonNegative = false) const
181  {
182  if (dataLen != nbins_) throw std::invalid_argument(
183  "In npstat::LocalPolyFilter1D::filter_2: "
184  "incompatible data length");
185  assert(in);
186 
187  const int typenum = NumpyTypecode<double>::code;
188  npy_intp sh = dataLen;
189  PyObject* xarr = PyArray_SimpleNew(1, &sh, typenum);
190  if (xarr)
191  {
192  double* out = (double*)PyArray_DATA((PyArrayObject*)xarr);
193  try
194  {
195  this->filter(in, dataLen, out);
196  }
197  catch (const std::exception& e)
198  {
199  Py_DECREF(xarr);
200  throw e;
201  }
202  if (makeNonNegative)
203  for (unsigned i=0; i<dataLen; ++i)
204  if (out[i] < 0.0)
205  out[i] = 0.0;
206  }
207  return xarr;
208  }
209 
210  inline PyObject* convolve_2(const double* in, unsigned dataLen,
211  const bool makeNonNegative = false) const
212  {
213  if (dataLen != nbins_) throw std::invalid_argument(
214  "In npstat::LocalPolyFilter1D::convolve_2: "
215  "incompatible data length");
216  assert(in);
217 
218  const int typenum = NumpyTypecode<double>::code;
219  npy_intp sh = dataLen;
220  PyObject* xarr = PyArray_SimpleNew(1, &sh, typenum);
221  if (xarr)
222  {
223  double* out = (double*)PyArray_DATA((PyArrayObject*)xarr);
224  try
225  {
226  this->convolve(in, dataLen, out);
227  }
228  catch (const std::exception& e)
229  {
230  Py_DECREF(xarr);
231  throw e;
232  }
233  if (makeNonNegative)
234  for (unsigned i=0; i<dataLen; ++i)
235  if (out[i] < 0.0)
236  out[i] = 0.0;
237  }
238  return xarr;
239  }
240 
241  inline void filterIntoBuffer(const double* in, unsigned dataLen,
242  double* out, unsigned outLen,
243  const bool makeNonNegative = false) const
244  {
245  if (dataLen != outLen) throw std::invalid_argument(
246  "In npstat::LocalPolyFilter1D::filterIntoBuffer: "
247  "incompatible array sizes");
248  this->filter(in, dataLen, out);
249  if (makeNonNegative)
250  for (unsigned i=0; i<dataLen; ++i)
251  if (out[i] < 0.0)
252  out[i] = 0.0;
253  }
254 
255  inline void convolveIntoBuffer(const double* in, unsigned dataLen,
256  double* out, unsigned outLen,
257  const bool makeNonNegative = false) const
258  {
259  if (dataLen != outLen) throw std::invalid_argument(
260  "In npstat::LocalPolyFilter1D::convolveIntoBuffer: incompatible array sizes");
261  this->convolve(in, dataLen, out);
262  if (makeNonNegative)
263  for (unsigned i=0; i<dataLen; ++i)
264  if (out[i] < 0.0)
265  out[i] = 0.0;
266  }
267 #endif // SWIG
268  };
269 
270  /**
271  // This class can be used in places where LocalPolyFilter1D is expected
272  // but filtering is not needed due to some reason. Calling "filter" or
273  // "convolve" methods of this class will transfer inputs to outputs
274  // unmodified. The data length still must be correct.
275  */
277  {
278  public:
279  explicit DummyLocalPolyFilter1D(unsigned dataLen);
280 
281  inline virtual ~DummyLocalPolyFilter1D() {}
282 
283  // Methods related to I/O
284  inline virtual gs::ClassId classId() const {return gs::ClassId(*this);}
285  virtual bool write(std::ostream& os) const;
286 
287  static inline const char* classname()
288  {return "npstat::DummyLocalPolyFilter1D";}
289  static inline unsigned version() {return 1;}
290  static DummyLocalPolyFilter1D* read(const gs::ClassId& id,
291  std::istream& in);
292  };
293 
294  /**
295  // The utility function that generates the most common filters using
296  // kernels from the symmetric beta family. The arguments are as follows:
297  //
298  // m -- Choose the kernel from the symmetric beta family
299  // proportional to (1 - x^2)^m. If m is negative,
300  // Gaussian kernel will be used, truncated at +- 12 sigma.
301  //
302  // bandwidth -- Kernel bandwidth.
303  //
304  // maxDegree -- Degree of the LOrPE polynomial. Interpretation of
305  // non-integer arguments is by the "continuousDegreeTaper"
306  // function.
307  //
308  // numberOfGridPoints -- Length of the data array to be used with this
309  // filter (typically, number of histogram bins
310  // and such).
311  //
312  // gridCellSize -- Cell size or histogram bin width.
313  //
314  // boundaryMethod -- Method for handling the weight function at the
315  // boundary of the density support region.
316  //
317  // exclusionMask -- If provided, array with numberOfGridPoints elements.
318  // If an element of this array is not 0, corresponding
319  // data point will be excluded from the filtering process.
320  //
321  // excludeCentralPoint -- If "true", the weight will be set to 0 for
322  // the central bin of the filter. This can be
323  // useful in some cross validation scenarios.
324  */
325  CPP11_auto_ptr<LocalPolyFilter1D> symbetaLOrPEFilter1D(
326  int m, double bandwidth, double maxDegree,
327  unsigned numberOfGridPoints, double gridCellSize,
328  const BoundaryHandling& boundaryMethod,
329  const unsigned char* exclusionMask = 0,
330  bool excludeCentralPoint = false);
331 
332  CPP11_auto_ptr<LocalPolyFilter1D> symbetaLOrPEFilter1D(const AllSymbetaParams1D& p);
333 
334  /*
335  // Weight generated by the symmetric beta function at 0 (or by the
336  // Gaussian in case m < 0). Useful in certain cross validation scenarios.
337  */
338  double symbetaWeightAt0(int m, double bandwidth);
339 
340 #ifdef SWIG
341  LocalPolyFilter1D* symbetaLOrPEFilter1D_2(
342  int m, double bandwidth, double maxDegree,
343  unsigned numberOfGridPoints, double gridCellSize,
344  const BoundaryHandling& boundaryMethod,
345  const std::vector<int>* exclusionMask = 0,
346  bool excludeCentralPoint = false);
347 
348  inline LocalPolyFilter1D* symbetaLOrPEFilter1D_2(const AllSymbetaParams1D& p)
349  {
350  CPP11_auto_ptr<LocalPolyFilter1D> ptr = symbetaLOrPEFilter1D(p);
351  return ptr.release();
352  }
353 #endif // SWIG
354 }
355 
356 #include "npstat/stat/LocalPolyFilter1D.icc"
357 
358 #endif // NPSTAT_LOCALPOLYFILTER1D_HH_
Abstract interface for building local polynomial filter weights in 1-d.
Interface definition for 1-d smoothers useable with cross-validation.
Complete set of parameters for building 1-d filters from the symmetric beta family.
Template matrix class.
Definition: AllSymbetaParams1D.hh:22
Definition: BoundaryHandling.hh:21
Definition: LocalPolyFilter1D.hh:277
Definition: LocalPolyFilter1D.hh:38
const PolyFilter1D & getFilter(unsigned binNumber) const
Matrix< double > getFilterMatrix() const
void convolve(const Tin *in, unsigned dataLen, Tout *out) const
CPP11_auto_ptr< LocalPolyFilter1D > eigenGroomedFilter() const
LocalPolyFilter1D(const double *taper, unsigned maxDegree, const AbsFilter1DBuilder &filterBuilder, unsigned dataLen)
void filter(const Tin *in, unsigned dataLen, Tout *out) const
double taper(unsigned degree) const
unsigned dataLen() const
Definition: LocalPolyFilter1D.hh:82
double selfContribution(unsigned binNumber) const
CPP11_auto_ptr< LocalPolyFilter1D > doublyStochasticFilter(double tolerance, unsigned maxIterations) const
Definition: AbsFilter1DBuilder.hh:34
Definition: AbsArrayProjector.hh:14
CPP11_auto_ptr< LocalPolyFilter1D > symbetaLOrPEFilter1D(int m, double bandwidth, double maxDegree, unsigned numberOfGridPoints, double gridCellSize, const BoundaryHandling &boundaryMethod, const unsigned char *exclusionMask=0, bool excludeCentralPoint=false)
Definition: AbsFilter1DBuilder.hh:71
Definition: AbsPolyFilter1D.hh:27