npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
lorpeBackground1D.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_LORPEBACKGROUND1D_HH_
2 #define NPSTAT_LORPEBACKGROUND1D_HH_
3 
4 /*!
5 // \file lorpeBackground1D.hh
6 //
7 // \brief A driver function for 1-d density estimation by LOrPE in a composite
8 // signal plus background model
9 //
10 // Author: I. Volobouev
11 //
12 // October 2013
13 */
14 
15 #include <cfloat>
16 
17 #include "npstat/stat/HistoND.hh"
20 
21 namespace npstat {
22  /** Possible modes for cross validation calculations */
23  enum {
24  CV_MODE_FAST = 0,
25  CV_MODE_MINUSBIN,
26  CV_MODE_MINUSONE,
27  CV_MODE_LINEARIZED,
28  N_CV_MODES
29  };
30 
31  /**
32  // A driver function for density estimation from histograms using LOrPE
33  // in a composite signal plus background model in which signal is
34  // represented by a parametric distribution. The function arguments are:
35  //
36  // histo -- Naturally, the histogram to fit. It is assumed that the
37  // histogram bins are not scaled and contain the actual
38  // unweighted event counts.
39  //
40  // fbuilder -- This object will generate local polynomial filters using
41  // densities from the symmetric beta family as weights.
42  // This argument is not a LocalPolyFilter1D already so that
43  // some memoization is allowed (this might be useful for
44  // speeding up the calculation).
45  //
46  // bm -- Boundary handling method.
47  //
48  // signal -- Distribution to use for modeling the signal. Will
49  // be internally renormalized to that it integrates
50  // to 1 on the histogram support interval.
51  //
52  // signalFraction -- Fraction of the signal in the sample. Must belong
53  // to the (-1, 1) interval.
54  //
55  // nIntegrationPoints -- How many points to use in order to integrate
56  // the parametric signal density across each bin. If this
57  // is specified as 0 then the difference of cumulative
58  // densities at the bin edges will be used, otherwise
59  // Gauss-Legendre quadrature will be employed (so that the
60  // number of points must be either 1 or one of the numbers
61  // supported by the "GaussLegendreQuadrature" class).
62  // For properly implemented densities, 0 should be the
63  // best option.
64  //
65  // initialApproximation -- Initial approximation for the background
66  // density (one array element per input histogram bin).
67  // Can be specified as NULL in which case the uniform
68  // density is used as the initial approximation.
69  //
70  // lenApproximation -- Length of the initial approximation array.
71  // Must be equal to the number of histogram bins.
72  //
73  // m -- Choose the kernel from the symmetric beta family
74  // proportional to (1 - x^2)^m. If m is negative,
75  // Gaussian kernel will be used, truncated at +- 12 sigma.
76  //
77  // bandwidth -- The bandwidth for the kernel used to generate the LOrPE
78  // polynomials.
79  //
80  // maxDegree -- Degree of the LOrPE polynomial. Interpretation of
81  // non-integer arguments is by the "continuousDegreeTaper"
82  // function.
83  //
84  // convergenceEpsilon -- We will postulate that the iterations have
85  // converged if the L1 distance between the background
86  // distributions obtained in two successive iterations
87  // is less than this number. Must be non-negative.
88  //
89  // maxIterations -- Maximum number of iterations allowed.
90  //
91  // signalDensity -- Buffer for storing the signal density integrated
92  // over histogram bins. This result will be normalized
93  // to 1 on the histogram support interval. Can be
94  // specified as NULL if not needed.
95  //
96  // lenSignalDensity -- Length of the "signalDensity" buffer.
97  //
98  // bgDensity -- Buffer for storing the background density estimate.
99  // This result will be normalized to 1 on the histogram
100  // support interval. Can be specified as NULL if not
101  // needed.
102  //
103  // lenBgDensity -- Length of the "bgDensity" buffer.
104  //
105  // workspace -- If this function is called many times on the same
106  // histogram, it is recommended to reuse the same
107  // workspace buffer. This will save a few memory
108  // allocation calls for various internal needs.
109  //
110  // densityMinusOne -- If provided, a buffer for storing the results
111  // in which the background density is estimated for every
112  // bin after removing one event from that bin (assuming that
113  // at least one event is present in that bin) or, depending
114  // on "cvmode", after removing the whole bin. Intended
115  // for subsequent use in cross validation. Note that
116  // requesting this calculation will slow the code down
117  // considerably. Strictly speaking, the returned numbers
118  // are calculated for non-empty bins only, they are not
119  // really a density, and they are intended purely for cross
120  // validation (there should be no attempt to normalize them).
121  //
122  // lenDensityMinusOne -- Length of the "densityMinusOne" buffer.
123  //
124  // cvmode -- If "densityMinusOne" array is provided, this parameter
125  // affects calculation of this density. For binned densities,
126  // this calculation can be performed in a number of ways
127  // which differ in their treatment of discretization effects.
128  // Possible modes are:
129  //
130  // CV_MODE_FAST -- Remove the bin and use the same
131  // global background approximation
132  // to construct the EDF weights for
133  // each such bin.
134  //
135  // CV_MODE_MINUSBIN -- Remove the bin and recalculate
136  // the background approximation
137  // without this bin by iterations.
138  //
139  // CV_MODE_MINUSONE -- Reduce the bin value by 1 and
140  // recalculate the background
141  // approximation by iterations.
142  //
143  // CV_MODE_LINEARIZED -- Faster, linearized version of
144  // CV_MODE_MINUSONE calculation
145  // (but might be less reliable).
146  //
147  // regularizationParameter -- If this parameter is non-negative, the code
148  // will attempt to figure out the minimum reasonable value
149  // of "densityMinusOne" if that density is estimated to be
150  // zero at some point which has data present. This minimum
151  // density will be inversely proportional to
152  // pow(N, regularizationParameter). This feature can be
153  // useful in pseudo-likelihood cross validation scenarios.
154  //
155  // lastDivergence -- If provided, *lastDivergence will be filled
156  // by the actual divergence between the two most recent
157  // iterations used to calculate the background density.
158  // The absolute value tells the difference. If the value
159  // is negative, this means that there were negative entries
160  // that were truncated to zero (in this case one should
161  // not expect very good convergence anyway).
162  //
163  // The function returns the iteration number for which convergence
164  // was established. If it is equal to "maxIterations" then the convergence
165  // target was not reached.
166  */
167  template<typename Numeric, typename NumIn, typename NumOut>
169  const HistoND<Numeric>& histo, AbsSymbetaFilterProvider& fbuilder,
170  const BoundaryHandling& bm,
171  const AbsDistribution1D& signal, double signalFraction,
172  unsigned nIntegrationPoints,
173  const NumIn* initialApproximation, unsigned lenApproximation,
174  int m, double bandwidth, double maxDegree,
175  double convergenceEpsilon, unsigned maxIterations,
176  NumOut* signalDensity, unsigned lenSignalDensity,
177  NumOut* bgDensity, unsigned lenBgDensity,
178  std::vector<double>& workspace,
179  NumOut* densityMinusOne = 0, unsigned lenDensityMinusOne = 0,
180  unsigned cvmode = CV_MODE_LINEARIZED,
181  double regularizationParameter = -1.0, double* lastDivergence = 0);
182 
183  /**
184  // Function that can calculate pseudo log-likelihood for cross validation
185  // using the output generated by "lorpeBackground1D". This function is
186  // using a subset of "lorpeBackground1D" arguments. It is assumed that
187  // "lorpeBackground1D" calculations have converged and that
188  // "densityMinusOne" was calculated. The "minlog" parameter limits
189  // the contribution into the log-likelihood from any single bin.
190  */
191  template<typename Numeric, typename NumOut>
193  const HistoND<Numeric>& histo, double signalFraction,
194  const NumOut* signalDensity, unsigned lenSignalDensity,
195  const NumOut* bgDensity, unsigned lenBgDensity,
196  const NumOut* densityMinusOne, unsigned lenDensityMinusOne,
197  double minlog = log(DBL_MIN));
198 
199  /**
200  // Function that can calculate the least squares cross validation quantity
201  // using the output generated by "lorpeBackground1D". This function is
202  // using a subset of "lorpeBackground1D" arguments. It is assumed that
203  // "lorpeBackground1D" calculations have converged and that
204  // "densityMinusOne" was calculated.
205  */
206  template<typename Numeric, typename NumOut>
208  const HistoND<Numeric>& histo, double signalFraction,
209  const NumOut* signalDensity, unsigned lenSignalDensity,
210  const NumOut* bgDensity, unsigned lenBgDensity,
211  const NumOut* densityMinusOne, unsigned lenDensityMinusOne);
212 
213  /**
214  // Function that can "regularize" the background density estimate
215  // generated by "lorpeBackground1D" for subsequent log-likelihood
216  // estimation. This procedure may be necessary in case there are
217  // data points at the locations in which the density was estimated
218  // to be 0 (or just too low).
219  //
220  // Parameter "minBgDensity1" specifies the minimum background-only
221  // density which is expected for any bin which has at least one
222  // background entry.
223  //
224  // The return value of the function tells in how many bins the
225  // background density had to be adjusted.
226  */
227  template<typename Numeric, typename NumOut>
229  const HistoND<Numeric>& histo, double signalFraction,
230  const NumOut* signalDensity, unsigned lenSignalDensity,
231  NumOut* bgDensity, unsigned lenBgDensity, double minBgDensity1);
232 
233  /**
234  // Function that can calculate log-likelihood (for maximizing likelihood)
235  // using the output generated by "lorpeBackground1D" (possibly, after
236  // processing by "lorpeRegularizeBgDensity"). This function is using
237  // a subset of "lorpeBackground1D" arguments. It is assumed that
238  // "lorpeBackground1D" calculations have converged. The "minlog" parameter
239  // limits the contribution into the log-likelihood from any single bin.
240  */
241  template<typename Numeric, typename NumOut>
243  const HistoND<Numeric>& histo, double signalFraction,
244  const NumOut* signalDensity, unsigned lenSignalDensity,
245  const NumOut* bgDensity, unsigned lenBgDensity,
246  double minlog = log(DBL_MIN));
247 
248  /**
249  // Estimate the maximum number of background points contained
250  // inside the sliding window with the width "windowWidth". All other
251  // parameters have the same meaning as in the "lorpeBackground1D"
252  // function.
253  */
254  template<typename Numeric, typename NumIn>
256  const HistoND<Numeric>& histo,
257  const AbsDistribution1D& signal, double signalFraction,
258  unsigned nIntegrationPoints, double windowWidth,
259  const NumIn* initialApproximation, unsigned lenApproximation,
260  std::vector<double>& workspace);
261 }
262 
263 #include "npstat/stat/lorpeBackground1D.icc"
264 
265 #endif // NPSTAT_LORPEBACKGROUND1D_HH_
Interface definition for 1-d continuous statistical distributions.
Interface definition for classes which build symmetric beta LOrPE filters.
Arbitrary-dimensional histogram template.
Definition: AbsSymbetaFilterProvider.hh:28
Definition: BoundaryHandling.hh:21
Definition: HistoND.hh:46
Definition: AbsArrayProjector.hh:14
unsigned lorpeRegularizeBgDensity1D(const HistoND< Numeric > &histo, double signalFraction, const NumOut *signalDensity, unsigned lenSignalDensity, NumOut *bgDensity, unsigned lenBgDensity, double minBgDensity1)
double maxBgPointsInWindow1D(const HistoND< Numeric > &histo, const AbsDistribution1D &signal, double signalFraction, unsigned nIntegrationPoints, double windowWidth, const NumIn *initialApproximation, unsigned lenApproximation, std::vector< double > &workspace)
double lorpeBgCVLeastSquares1D(const HistoND< Numeric > &histo, double signalFraction, const NumOut *signalDensity, unsigned lenSignalDensity, const NumOut *bgDensity, unsigned lenBgDensity, const NumOut *densityMinusOne, unsigned lenDensityMinusOne)
double lorpeBgCVPseudoLogli1D(const HistoND< Numeric > &histo, double signalFraction, const NumOut *signalDensity, unsigned lenSignalDensity, const NumOut *bgDensity, unsigned lenBgDensity, const NumOut *densityMinusOne, unsigned lenDensityMinusOne, double minlog=log(DBL_MIN))
double lorpeBgLogli1D(const HistoND< Numeric > &histo, double signalFraction, const NumOut *signalDensity, unsigned lenSignalDensity, const NumOut *bgDensity, unsigned lenBgDensity, double minlog=log(DBL_MIN))
unsigned lorpeBackground1D(const HistoND< Numeric > &histo, AbsSymbetaFilterProvider &fbuilder, const BoundaryHandling &bm, const AbsDistribution1D &signal, double signalFraction, unsigned nIntegrationPoints, const NumIn *initialApproximation, unsigned lenApproximation, int m, double bandwidth, double maxDegree, double convergenceEpsilon, unsigned maxIterations, NumOut *signalDensity, unsigned lenSignalDensity, NumOut *bgDensity, unsigned lenBgDensity, std::vector< double > &workspace, NumOut *densityMinusOne=0, unsigned lenDensityMinusOne=0, unsigned cvmode=CV_MODE_LINEARIZED, double regularizationParameter=-1.0, double *lastDivergence=0)
Definition: AbsDistribution1D.hh:31