npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
MinuitSemiparametricFitFcn1D.hh
Go to the documentation of this file.
1 #ifndef NPSI_MINUITSEMIPARAMETRICFITFCN1D_HH_
2 #define NPSI_MINUITSEMIPARAMETRICFITFCN1D_HH_
3 
4 /*!
5 // \file MinuitSemiparametricFitFcn1D.hh
6 //
7 // \brief Fit of a density by maximum likelihood using parametric signal
8 // model and nonparametric background
9 //
10 // Author: I. Volobouev
11 //
12 // October 2013
13 */
14 
15 #include <cmath>
16 #include <cfloat>
17 #include <iostream>
18 
20 
21 #include "Minuit2/MnUserParameters.h"
22 #include "Minuit2/MnMinimize.h"
23 #include "Minuit2/FunctionMinimum.h"
24 
25 namespace npsi {
26  /**
27  // Target minimization function adapter class for running maximum
28  // likelihood density fits to histogrammed data by Minuit2 in which
29  // background is estimated by LOrPE and signal by a parametric model.
30  //
31  // "Numeric" is the type of fitted histogram contents.
32  //
33  // "DensityConstructor" is a functor which creates the necessary
34  // signal density function out of a vector of parameters.
35  // Must have "operator()(const std::vector<double>&) const"
36  // which returns an object (or a reference) of some class which
37  // was derived from AbsDistribution1D. "ScalableDensityConstructor1D"
38  // is an example of such a constructor class, appropriate in case
39  // only the signal fraction, location and scale are to be fitted
40  // but not the signal shape.
41  //
42  // "NumIn" is the type of the array elements used to provide initial
43  // guess for the background density.
44  //
45  // Note the special order of Minuit parameters which must be used
46  // with this class. See the comments to "operator()" for more detail.
47  */
48  template <typename Numeric, class DensityConstructor, typename NumIn=double>
49  class MinuitSemiparametricFitFcn1D : public ROOT::Minuit2::FCNBase
50  {
51  public:
52  /**
53  // The constructor arguments are as follows:
54  //
55  // histo -- Naturally, the histogram to fit. It is assumed that the
56  // histogram bins are not scaled and contain the actual
57  // unweighted event counts.
58  //
59  // densityMaker -- This object will generate AbsDistribution1D objects
60  // for the signal model from the vector of signal parameters.
61  //
62  // fb -- "Filter builder". This object will generate local
63  // polynomial filters using densities from the symmetric
64  // beta family as weights. A reference implementation
65  // of such a class is provided with the NPStat software:
66  // "SimpleSymbetaFilterProvider". It may be possible to
67  // construct more efficient implementations which will
68  // reuse filters constructed for different bandwidth values.
69  //
70  // bm -- Method for handling LOrPE weight function at the
71  // boundaries of density support region.
72  //
73  // symmetricBetaPower -- This parameter determines the choice of
74  // the kernel. If it is negative, Gaussian kernel will
75  // be used, truncated at +- 12 sigma. If it is positive
76  // or 0, the kernel from the symmetric beta family will
77  // be used, proportional to (1 - x^2)^symmetricBetaPower.
78  // In my experience, most useful values of this parameter
79  // are -1 and 4.
80  //
81  // minimumBgFeatureSize -- Minimum feature size (like sigma of the
82  // Gaussian) of the background distribution that the
83  // nonparametric component of the fit should attempt to
84  // represent in full detail. Together with other things
85  // such as the number of events, this parameter will affect
86  // the smallest LOrPE bandwidth considered in the fit.
87  //
88  // initialApproximation -- Initial approximation for the background
89  // density (one array element per input histogram bin).
90  // Can be specified as NULL in which case the uniform
91  // density will be used as the initial approximation.
92  //
93  // lenApproximation -- Length of the initial approximation array.
94  // If not 0 (and if initialApproximation is not NULL),
95  // the code will verify that it equals the number of
96  // histogram bins.
97  //
98  // polyDegreeLimit -- The maximum polynomial degree to consider
99  // during cross validation (will be used as a limiting
100  // value of the corresponding MIGRAD parameter). Due to the
101  // round-off errors in calculating orthogonal polynomials,
102  // this number should not exceed 20 or so. The default
103  // value of -1 (and any other negative number) means that
104  // a reasonable limit estimate will be made automatically.
105  //
106  // verbose -- If "true", some info will be printed on the standard
107  // output while the fit is in progress.
108  //
109  // cvmode -- Mode for cross validation calculations. See comments
110  // to the "lorpeBackground1D" function for more detail
111  // on the meaning of this argument.
112  //
113  // useLeastSquaresCV -- "true" means use least squares cross
114  // validation. "false" means use pseudo likelihood.
115  //
116  // refitBandwidth -- If "true", the code will refit the bandwidth
117  // of the background model by cross validation for every
118  // value of signal parameters. In this case the user should
119  // fix the bandwidth parameter in the fit (it will be used
120  // as the initial guess only).
121  //
122  // refitPolyDegree -- If "true", the code will refit the polynomial
123  // degree of the background model by cross validation
124  // for every value of signal parameters. In this case
125  // the user should fix the polynomial degree parameter
126  // in the fit (it will be used as the initial guess only).
127  //
128  // regularizationParameter -- If this parameter is non-negative,
129  // the code will attempt to figure out the minimum
130  // reasonable value of the density estimate in cases
131  // that density is estimated to be zero at some point where
132  // data is present. This minimum density will be inversely
133  // proportional to pow(N, regularizationParameter).
134  // This will be applied during the likelihood calculation
135  // and in cross validation (limited to pseudo-likelihood
136  // CV, not done for least squares CV).
137  //
138  // nIntegrationPoints -- How many points to use in order to integrate
139  // the parametric signal density across each histogram bin.
140  // If this argument is specified as 0 then the difference
141  // of cumulative densities at the bin edges will be used,
142  // otherwise Gauss-Legendre quadrature will be employed
143  // (so that the number of points must be either 1 or one
144  // of the numbers supported by the "GaussLegendreQuadrature"
145  // class). For properly implemented densities, 0 should be
146  // the best option.
147  //
148  // bandwidthUpperLimitFactor -- This parameter determines the maximum
149  // kernel bandwidth which will be considered during cross
150  // validation. For Gaussian kernels, the maximum sigma
151  // will be set to the width of the histogram times this
152  // factor. For other kernels the maximum width will be
153  // adjusted accordingly (technically, the additional
154  // adjustment factor is determined by the "canonical
155  // bandwidth" ratio evaluated for the initial degree
156  // of the model polynomial).
157  //
158  // pseudoFitCycles -- Sometimes, when the initial guess of the
159  // bandwidth and polynomial degree is far away from
160  // optimal, Minuit has problems navigating towards the
161  // minimum in this 2-d space (the "narrow valley" problem).
162  // In this case it may be helful to make a number of
163  // steps at the beginning optimizing each variable in turn
164  // while keeping the other variable fixed. This parameter
165  // determines how many pairs of such initial steps to make
166  // during cross validation.
167  //
168  // convergenceEpsilon -- The background density is refitted
169  // iteratively by the relevant algorithm until convergence
170  // is achieved. The convergence criterion is that the L1
171  // distance between the background distributions obtained
172  // in two successive iterations is less than this number.
173  // Must be non-negative. A reasonable value will be
174  // selected automatically in case default value of
175  // 0.0 is given.
176  //
177  // pseudoFitPrecision -- The Minuit working precision for maximizing
178  // the cross validation pseudo likelihood. Should be
179  // higher than the convergenceEpsilon by about an order
180  // of magnitude or so. Note that you will need to adjust
181  // precision of _this_ fit as well. A reasonable value of
182  // "pseudoFitPrecision" will be selected automatically
183  // in case default value of 0.0 is given.
184  //
185  // bandwidthScanPoints -- If Minuit fails to optimize the quantity
186  // used in cross validation, the optimum will be found by
187  // simply scanning the bandwidth and the polynomial degree.
188  // This parameter determines the number of bandwidth points
189  // to scan.
190  //
191  // minlog -- This parameter limits the contribution of non-empty
192  // histogram bins into the log-likelihood in case the
193  // density was estimated to be 0 for that bin.
194  //
195  // up -- The Minuit "up" parameter which affects the definition
196  // of uncertainties. See the Minuit manual for details.
197  //
198  // upRefit -- The "up" parameter to use in the cross validation step.
199  //
200  // maxIterations -- The hard limit on the number of iterations that
201  // can be used for background determination. If convergence
202  // is not achieved after this number of iterations,
203  // an exception will be thrown.
204  //
205  // This class will not assume ownership of any references. However,
206  // an internal copy will be made of the "initialApproximation" array
207  // if this array is not NULL.
208  */
210  const DensityConstructor& densityMaker,
212  const npstat::BoundaryHandling& bm,
213  const int symmetricBetaPower,
214  const double minimumBgFeatureSize,
215  const NumIn* initialApproximation = 0,
216  const unsigned lenApproximation = 0U,
217  const int polyDegreeLimit = -1,
218  const bool verbose = true,
219  const unsigned cvmode =
220  npstat::CV_MODE_LINEARIZED,
221  const bool useLeastSquaresCV = true,
222  const bool refitBandwidth = true,
223  const bool refitPolyDegree = true,
224  const double regularizationParameter = -1.0,
225  const unsigned nIntegrationPoints = 0U,
226  const double bandwidthUpperLimitFactor=5.0,
227  const unsigned pseudoFitCycles = 0U,
228  const double convergenceEpsilon = 0.0,
229  const double pseudoFitPrecision = 0.0,
230  const unsigned bandwidthScanPoints = 64U,
231  const double minlog = log(DBL_MIN),
232  const double up = 0.5,
233  const double upRefit = 0.05,
234  const unsigned maxIterations = 10000U)
235  : signalDensityBuffer_(histo.nBins(), 0.0),
236  bgDensityBuffer_(histo.nBins(), 0.0),
237  histo_(histo),
238  fbuilder_(fb),
239  densityMaker_(densityMaker),
240  minimumBgFeatureSize_(minimumBgFeatureSize),
241  convergenceEpsilon_(convergenceEpsilon),
242  pseudoFitPrecision_(pseudoFitPrecision),
243  bandwidth_(0.0),
244  maxDegree_(-1.0),
245  signalFraction_(0.0),
246  minlog_(minlog),
247  up_(up),
248  upNonparametric_(upRefit),
249  regularizationParameter_(regularizationParameter),
250  bandwidthLimitFactor_(bandwidthUpperLimitFactor),
251  maxBgEventsInWindow_(-1.0),
252  callCount_(0),
253  nIntegrationPoints_(nIntegrationPoints),
254  maxIterations_(maxIterations),
255  nFitCycles_(pseudoFitCycles),
256  polyDegreeLimit_(polyDegreeLimit),
257  cvmode_(cvmode),
258  bandwidthScanPoints_(bandwidthScanPoints),
259  m_(symmetricBetaPower),
260  numRegularized_(0U),
261  bm_(bm),
262  refitBandwidth_(refitBandwidth),
263  refitPolyDegree_(refitPolyDegree),
264  verbose_(verbose),
265  useLeastSquaresCV_(useLeastSquaresCV)
266  {
267  if (histo_.dim() != 1U) throw std::invalid_argument(
268  "In npsi::MinuitSemiparametricFitFcn1D constructor : "
269  "only 1-d histograms can be used with this class");
270 
271  if (initialApproximation && lenApproximation)
272  {
273  initialApproximation_.reserve(lenApproximation);
274  for (unsigned i=0; i<lenApproximation; ++i)
275  initialApproximation_.push_back(initialApproximation[i]);
276  }
277 
278  if (convergenceEpsilon_ == 0.0)
279  convergenceEpsilon_ = 10.0*DBL_EPSILON*histo.nBins();
280  if (pseudoFitPrecision_ == 0.0)
281  pseudoFitPrecision_ = 10.0*convergenceEpsilon_;
282 
283  if (polyDegreeLimit >= 0)
284  if (static_cast<unsigned>(polyDegreeLimit) >
286  throw std::invalid_argument(
287  "In npsi::MinuitSemiparametricFitFcn1D constructor: "
288  "polynomial degree limit outside of supported range");
289 
290  if (minimumBgFeatureSize_ < 0.0)
291  throw std::invalid_argument(
292  "In npsi::MinuitSemiparametricFitFcn1D constructor: "
293  "minimum feature size can not be negative");
294 
295  nEvents_ = histo_.binContents().template sum<long double>();
296  }
297 
298  inline virtual ~MinuitSemiparametricFitFcn1D() {}
299 
300  inline double Up() const {return up_;}
301 
302  inline double lastBandwidthUsed() const {return bandwidth_;}
303 
304  inline double lastPolyDegreeUsed() const {return maxDegree_;}
305 
306  /**
307  // The number of bins for which the background density was
308  // actually adjusted during the most recent run
309  */
310  inline unsigned lastRegularized() const {return numRegularized_;}
311 
312  inline double lastSignalFractionUsed() const {return signalFraction_;}
313 
314  inline const std::vector<double>& lastBackgroundFitted() const
315  {return bgDensityBuffer_;}
316 
317  inline const std::vector<double>& lastSignalFitted() const
318  {return signalDensityBuffer_;}
319 
320  inline unsigned long callCount() const {return callCount_;}
321 
322  inline double crossValidationPrecision() const
323  {return pseudoFitPrecision_;}
324 
325  inline int symbetaPower() const {return m_;}
326 
327  inline double maxBgEventsInGaussWindow() const
328  {return maxBgEventsInWindow_;}
329 
330  inline npstat::AbsSymbetaFilterProvider& getFilterProvider() const
331  {return fbuilder_;}
332 
333  inline std::vector<double> lastDensityFitted() const
334  {
335  const double bgfrac = 1.0 - signalFraction_;
336  const unsigned nbins = histo_.nBins();
337  std::vector<double> dvec(nbins);
338  for (unsigned i=0; i<nbins; ++i)
339  dvec[i] = signalFraction_*signalDensityBuffer_[i] +
340  bgfrac*bgDensityBuffer_[i];
341  return dvec;
342  }
343 
344  /**
345  // This method returns the negative log likelihood.
346  //
347  // Parameters must be given in a specific order:
348  //
349  // x[0] -- Value of the LOrPE bandwidth. Should be fixed
350  // in the fit if "refitBandwidth" was set "true"
351  // in the constructor.
352  //
353  // x[1] -- Degree of the LOrPE polynomial. Should be fixed
354  // in the fit if "refitPolyDegree" was set "true"
355  // in the constructor.
356  //
357  // x[2] -- Signal fraction. Can be either fixed or fitted.
358  //
359  // x[3...] -- These parameters will be passed to the
360  // density maker for building the signal density.
361  */
362  virtual double operator()(const std::vector<double>& x) const
363  {
364  const unsigned npara = x.size();
365  if (npara < 3U) throw std::invalid_argument(
366  "In npsi::MinuitSemiparametricFitFcn1D::operator(): "
367  "insufficient number of parameters, need at least 3");
368 
369  bandwidth_ = x[0];
370  maxDegree_ = x[1];
371  signalFraction_ = x[2];
372 
373  if (!refitBandwidth_ && bandwidth_ <= 0.0)
374  throw std::invalid_argument(
375  "In npsi::MinuitSemiparametricFitFcn1D::operator(): "
376  "if not refitted, the bandwidth must be positive");
377 
378  if (!refitPolyDegree_ && maxDegree_ < 0.0)
379  throw std::invalid_argument(
380  "In npsi::MinuitSemiparametricFitFcn1D::operator(): "
381  "if not refitted, polynomial degree can not be negative");
382 
383  if (signalParameters_.size() != npara - 3U)
384  signalParameters_.resize(npara - 3U);
385  for (unsigned i=3U; i<npara; ++i)
386  signalParameters_[i - 3U] = x[i];
387  const npstat::AbsDistribution1D& signal =
388  densityMaker_(signalParameters_);
389 
390  const unsigned lenApprox = initialApproximation_.size();
391  const NumIn* initApprox = 0;
392  if (lenApprox)
393  initApprox = &initialApproximation_[0];
394 
395  // Estimate the max number of bg events in a window
396  // of relevant size. We will later use this number
397  // for things like determining the maximum degree
398  // of the LOrPE polynomial.
399  //
400  // Note that this calculation is performed only once,
401  // using the initial signal settings. If we redo this
402  // calculation every time signal parameters change,
403  // memoizing the filters becomes hopeless.
404  //
405  double winBgEvents = 1.0;
406  if (refitBandwidth_)
407  {
408  if (maxBgEventsInWindow_ <= 0.0)
409  {
410  maxBgEventsInWindow_ = 1.0;
411  const double oneSigmaCoverage = 0.682689492137085897;
412  const double nbg2 = npstat::maxBgPointsInWindow1D(
413  histo_, signal, signalFraction_, nIntegrationPoints_,
414  2.0*minimumBgFeatureSize_, initApprox, lenApprox,
415  workspace_)/oneSigmaCoverage;
416  if (nbg2 > maxBgEventsInWindow_)
417  maxBgEventsInWindow_ = nbg2;
418  }
419  winBgEvents = maxBgEventsInWindow_;
420  }
421 
422  if (refitBandwidth_ || refitPolyDegree_)
423  {
424  // Figure out optimal LOrPE parameters by cross validation
425  const unsigned beVerboseFor = 3U;
426  const bool verboseRefit = verbose_ && callCount_ < beVerboseFor;
427  if (verbose_ && beVerboseFor && callCount_ == beVerboseFor)
428  std::cout << "**** MinuitLOrPEBgCVFcn1D appears to work "
429  << "fine and will shut up now" << std::endl;
430 
432  histo_, fbuilder_, signal, signalFraction_,
433  nIntegrationPoints_, initApprox, lenApprox,
434  m_, minimumBgFeatureSize_, winBgEvents,
435  convergenceEpsilon_, maxIterations_,
436  bm_, useLeastSquaresCV_,
437  verboseRefit, !refitBandwidth_, cvmode_,
438  regularizationParameter_, minlog_, upNonparametric_);
439 
440  // If the initial values of bandwidth and/or poly degree
441  // are negative (also 0 in case of the bandwidth) we must
442  // find a good initial approximation by scanning
443  const bool scanBandwidth = refitBandwidth_ && bandwidth_ <= 0.0;
444  const bool scanPolyDeg = refitPolyDegree_ && maxDegree_ < 0.0;
445  double bwstep = 0.0, degstep = 0.0;
446 
447  if (scanBandwidth && scanPolyDeg)
448  scanBandwidthAndDegree(bgFcn, bandwidthScanPoints_,
449  winBgEvents, &bandwidth_, &bwstep,
450  &maxDegree_, &degstep);
451  else if (scanBandwidth)
452  scanBandwidthOnly(bgFcn, bandwidthScanPoints_,
453  winBgEvents, maxDegree_,
454  &bandwidth_, &bwstep);
455  else if (scanPolyDeg)
456  scanPolyDegOnly(bgFcn, winBgEvents,
457  bandwidth_, &maxDegree_, &degstep);
458 
459  if (!refitBandwidthAndDegree(bgFcn, nFitCycles_, winBgEvents,
460  !refitBandwidth_, !refitPolyDegree_,
461  &bandwidth_, bwstep,
462  &maxDegree_, degstep))
463  {
464  // The Minuit fit has to converge if we are looking
465  // into just 1-d optimization. If it doesn't converge,
466  // something must be really wrong with the problem setup.
467  if (!(refitBandwidth_ && refitPolyDegree_))
468  throw std::runtime_error(
469  "In npsi::MinuitSemiparametricFitFcn1D::operator()"
470  ": Minuit2 failed to optimize cross-validation "
471  "bandwidth or poly degree");
472 
473  if (!(scanBandwidth && scanPolyDeg))
474  {
475  // Try to determine a better starting point by
476  // scanning the bandwidth and the degree in wide
477  // ranges of possible values
478  scanBandwidthAndDegree(bgFcn, bandwidthScanPoints_,
479  winBgEvents,
480  &bandwidth_, &bwstep,
481  &maxDegree_, &degstep);
482 
483  // Refit again using the new starting point.
484  // If this fit fails again then just leave
485  // the bandwidth and degree parameters at their
486  // values determined by the scan.
487  refitBandwidthAndDegree(bgFcn, nFitCycles_, winBgEvents,
488  !refitBandwidth_,
489  !refitPolyDegree_,
490  &bandwidth_, bwstep,
491  &maxDegree_, degstep);
492  }
493  }
494  }
495 
496  const unsigned niter = npstat::lorpeBackground1D(
497  histo_, fbuilder_, bm_, signal, signalFraction_,
498  nIntegrationPoints_, initApprox, lenApprox, m_,
499  bandwidth_, maxDegree_,
500  convergenceEpsilon_, maxIterations_,
501  &signalDensityBuffer_[0], signalDensityBuffer_.size(),
502  &bgDensityBuffer_[0], bgDensityBuffer_.size(), workspace_);
503 
504  if (niter >= maxIterations_) throw std::runtime_error(
505  "In npsi::MinuitSemiparametricFitFcn1D::operator(): "
506  "background determination algorithm failed to converge");
507 
508  if (regularizationParameter_ >= 0.0)
509  {
510  const double nBg = nEvents_*(1.0 - signalFraction_);
511  const double minBgDensity1 = npstat::symbetaWeightAt0(
512  m_, bandwidth_)/nBg/std::pow(nBg, regularizationParameter_);
513  numRegularized_ = npstat::lorpeRegularizeBgDensity1D(
514  histo_, signalFraction_,
515  &signalDensityBuffer_[0], signalDensityBuffer_.size(),
516  &bgDensityBuffer_[0], bgDensityBuffer_.size(),
517  minBgDensity1);
518  }
519 
520  const double logli = npstat::lorpeBgLogli1D(
521  histo_, signalFraction_,
522  &signalDensityBuffer_[0], signalDensityBuffer_.size(),
523  &bgDensityBuffer_[0], bgDensityBuffer_.size(), minlog_);
524 
525  if (verbose_)
526  {
527  std::cout << "C " << callCount_
528  << ", bw " << bandwidth_
529  << ", deg " << maxDegree_
530  << ", sf " << signalFraction_;
532  dynamic_cast<const npstat::AbsScalableDistribution1D*>(
533  &signal);
534  if (s)
535  std::cout << ", m " << s->location()
536  << ", s " << s->scale();
537  std::cout << ", ll " << logli;
538  if (regularizationParameter_ >= 0.0)
539  std::cout << ", adj " << numRegularized_;
540  std::cout << '\n';
541  std::cout.flush();
542  }
543 
544  ++callCount_;
545  return -logli;
546  }
547 
548  protected:
549  // Get the maximum bandwidth according to the limit provided
550  // in the constructor
551  inline double estimateMaxBandwidth() const
552  {
553  const double bwfactor = npstat::symbetaBandwidthRatio(m_, 0);
554  const npstat::HistoAxis& axis = histo_.axis(0);
555  return bwfactor*(axis.max() - axis.min())*bandwidthLimitFactor_;
556  }
557 
558  //
559  // Get a reasonable estimate for the maximum degree of the LOrPE
560  // filter from the plugin bandwidth theory
561  //
562  inline unsigned maxFilterDegree1D(const double nbg) const
563  {
564  if (polyDegreeLimit_ >= 0)
565  return polyDegreeLimit_;
566 
567  unsigned lim = m_ >= 0 ? npstat::amisePluginDegreeSymbeta(m_,nbg) :
569  lim += (lim/2U + 1U);
570  if (lim < 3U)
571  lim = 3U;
572  else if (lim > npstat::maxFilterDegreeSupported())
574  return lim;
575  }
576 
577  private:
578  void scanPolyDegOnly(const MinuitLOrPEBgCVFcn1D<Numeric,NumIn>& bgFcn,
579  const double effBgEvents, const double bw,
580  double* bestDegree, double* stepSize) const
581  {
582  const unsigned nIntervalsOnUnit = 4U;
583 
584  assert(bestDegree);
585  assert(stepSize);
586  assert(bw > 0.0);
587 
588  const unsigned maxDeg = maxFilterDegree1D(effBgEvents);
589  const double degstep = 1.0/nIntervalsOnUnit;
590 
591  std::vector<double> arg(2);
592  arg[0] = bw;
593 
594  bgFcn.getFilterProvider().startMemoizing();
595 
596  double minFCN = DBL_MAX, bestdeg = 0.0;
597  for (unsigned ideg=0U; ideg<=maxDeg; ++ideg)
598  {
599  const unsigned nInt = ideg == maxDeg ? 1U : nIntervalsOnUnit;
600  for (unsigned iint=0U; iint<nInt; ++iint)
601  {
602  arg[1] = iint*degstep + ideg;
603  const double f = bgFcn(arg);
604  if (f < minFCN)
605  {
606  minFCN = f;
607  bestdeg = arg[1];
608  }
609  }
610  }
611 
612  bgFcn.getFilterProvider().stopMemoizing();
613 
614  *bestDegree = bestdeg;
615  *stepSize = degstep;
616  }
617 
618  void scanBandwidthOnly(
619  const MinuitLOrPEBgCVFcn1D<Numeric,NumIn>& bgFcn,
620  const unsigned bwScanPoints, const double effBgEvents,
621  const double maxdeg, double* bestBandwidth, double* logStep) const
622  {
623  assert(bwScanPoints > 1U);
624  assert(maxdeg >= 0.0);
625  assert(bestBandwidth);
626  assert(logStep);
627 
628  const double maxRatio = estimateMaxBandwidth()/
629  minHistoBandwidth1D(histo_, minimumBgFeatureSize_,
630  0.0, effBgEvents, m_);
631  const double logstep = log(maxRatio)/(bwScanPoints - 1U);
632  std::vector<double> arg(2);
633  arg[1] = maxdeg;
634 
635  bgFcn.getFilterProvider().startMemoizing();
636 
637  double minFCN = DBL_MAX, bestbw = 0.0;
638  for (unsigned ibw=0U; ibw<bwScanPoints; ++ibw)
639  {
640  arg[0] = ibw*logstep;
641  const double f = bgFcn(arg);
642  if (f < minFCN)
643  {
644  minFCN = f;
645  bestbw = arg[0];
646  }
647  }
648 
649  bgFcn.getFilterProvider().stopMemoizing();
650 
651  *bestBandwidth = bgFcn.getActualBandwidth(bestbw, maxdeg);
652  *logStep = logstep;
653  }
654 
655  void scanBandwidthAndDegree(
656  const MinuitLOrPEBgCVFcn1D<Numeric,NumIn>& bgFcn,
657  const unsigned bwScanPoints, const double effBgEvents,
658  double* p_bandwidth, double* p_bwstep,
659  double* p_maxDegree, double* p_degstep) const
660  {
661  const unsigned nIntervalsOnUnit = 4U;
662 
663  assert(p_bandwidth);
664  assert(p_bwstep);
665  assert(p_maxDegree);
666  assert(p_degstep);
667  assert(bwScanPoints > 1U);
668  assert(refitBandwidth_);
669 
670  const unsigned maxDeg = maxFilterDegree1D(effBgEvents);
671  const double degstep = 1.0/nIntervalsOnUnit;
672  const double maxRatio = estimateMaxBandwidth()/
673  minHistoBandwidth1D(histo_, minimumBgFeatureSize_,
674  0.0, effBgEvents, m_);
675  const double logstep = log(maxRatio)/(bwScanPoints - 1U);
676  std::vector<double> arg(2);
677 
678  bgFcn.getFilterProvider().startMemoizing();
679 
680  double minFCN = DBL_MAX;
681  for (unsigned ideg=0U; ideg<=maxDeg; ++ideg)
682  {
683  const unsigned nInt = ideg == maxDeg ? 1U : nIntervalsOnUnit;
684  for (unsigned iint=0U; iint<nInt; ++iint)
685  {
686  arg[1] = iint*degstep + ideg;
687  for (unsigned ibw=0U; ibw<bwScanPoints; ++ibw)
688  {
689  arg[0] = ibw*logstep;
690  const double f = bgFcn(arg);
691  if (f < minFCN)
692  {
693  minFCN = f;
694  *p_bandwidth = arg[0];
695  *p_maxDegree = arg[1];
696  }
697  }
698  }
699  }
700 
701  bgFcn.getFilterProvider().stopMemoizing();
702 
703  *p_bandwidth = bgFcn.getActualBandwidth(*p_bandwidth, *p_maxDegree);
704  *p_bwstep = logstep;
705  *p_degstep = degstep;
706  }
707 
708  // The following function must return "true" on success
709  // and "false" on failure. *p_bandwidth and *p_maxDegree
710  // will be used both as initial approximations and to
711  // hold the output.
712  //
713  bool refitBandwidthAndDegree(
714  const MinuitLOrPEBgCVFcn1D<Numeric,NumIn>& bgFcn,
715  const unsigned ncycles, const double effectiveBgEvents,
716  const bool fixBandwidth, const bool fixPolyDegree,
717  double* p_bandwidth, const double inBwstep,
718  double* p_maxDegree, const double inDegstep) const
719  {
720  assert(p_bandwidth);
721  assert(p_maxDegree);
722 
723  double bw = *p_bandwidth;
724  double deg = *p_maxDegree;
725 
726  // Bring the initial polynomial degree within limits
727  const double degreeLimit = maxFilterDegree1D(effectiveBgEvents);
728  if (!fixPolyDegree)
729  {
730  if (deg < 0.0)
731  deg = 0.0;
732  if (deg > degreeLimit)
733  deg = degreeLimit;
734  }
735 
736  // Calculate the bandwidth limits
737  const double minBandwidth = minHistoBandwidth1D(
738  histo_, minimumBgFeatureSize_, deg, effectiveBgEvents, m_);
739  const double maxRatio = estimateMaxBandwidth()/
740  minHistoBandwidth1D(histo_, minimumBgFeatureSize_,
741  0.0, effectiveBgEvents, m_);
742 
743  // Bring the initial bandwidth within limits
744  if (!fixBandwidth)
745  {
746  if (maxRatio <= 1.0)
747  throw std::invalid_argument(
748  "In npsi::MinuitSemiparametricFitFcn1D::"
749  "refitBandwidthAndDegree: minimum bandwidth is too "
750  "large (or maximum is too small)");
751 
752  const double maxBandwidth = minBandwidth*maxRatio;
753  const double limFactor = 1.00001;
754 
755  if (bw < minBandwidth*limFactor)
756  bw = minBandwidth*limFactor;
757  if (bw > maxBandwidth/limFactor)
758  bw = maxBandwidth/limFactor;
759 
760  bw = log(bw/minBandwidth);
761  }
762 
763  // Try the cross validation fit several times
764  // with different precision
765  const double precision = crossValidationPrecision();
766  for (unsigned itry=0; itry<5U; ++itry)
767  {
768  const double errfactor = pow(1.4, itry);
769  const double precfactor = pow(10.0, itry);
770 
771  // Initialize Minuit parameters
772  ROOT::Minuit2::MnUserParameters upar;
773  if (fixBandwidth)
774  upar.Add("bandwidth", bw, 0.1);
775  else
776  {
777  const double bwstep = inBwstep > 0.0 ?
778  inBwstep : 0.1*errfactor;
779  upar.Add("bandwidth", bw, bwstep, 0.0, log(maxRatio));
780  }
781  if (fixPolyDegree)
782  upar.Add("degree", deg, 0.1);
783  else
784  {
785  const double degstep = inDegstep > 0.0 ?
786  inDegstep : 0.1*errfactor;
787  upar.Add("degree", deg, degstep, 0.0, degreeLimit);
788  }
789 
790  // Create the minimization engine
791  ROOT::Minuit2::MnMinimize mini(bgFcn, upar);
792  mini.SetPrecision(precision*precfactor);
793 
794  if (fixBandwidth)
795  mini.Fix(0U);
796  else if (fixPolyDegree)
797  mini.Fix(1U);
798  else
799  {
800  // Minuit is not good at fitting functions with strongly
801  // correlated parameters. Because of this, we might want
802  // to perform a few fix/release cycles.
803  for (unsigned icycle=0; icycle<ncycles; ++icycle)
804  {
805  mini.Fix(1U);
806  mini();
807  mini.Release(1U);
808 
809  mini.Fix(0U);
810  mini();
811  mini.Release(0U);
812  }
813  }
814 
815  ROOT::Minuit2::FunctionMinimum fit(mini());
816  if (fit.IsValid())
817  {
818  *p_maxDegree = mini.Value(1U);
819  if (fixBandwidth)
820  *p_bandwidth = mini.Value(0U);
821  else
822  *p_bandwidth = bgFcn.getActualBandwidth(mini.Value(0U),
823  *p_maxDegree);
824  return true;
825  }
826  }
827 
828  return false;
829  }
830 
831  mutable std::vector<double> signalParameters_;
832  mutable std::vector<double> signalDensityBuffer_;
833  mutable std::vector<double> bgDensityBuffer_;
834  mutable std::vector<double> workspace_;
835 
836  const npstat::HistoND<Numeric>& histo_;
838  const DensityConstructor& densityMaker_;
839  std::vector<NumIn> initialApproximation_;
840 
841  double minimumBgFeatureSize_;
842  double convergenceEpsilon_;
843  double pseudoFitPrecision_;
844  mutable double bandwidth_;
845  mutable double maxDegree_;
846  mutable double signalFraction_;
847  double minlog_;
848  double up_;
849  double upNonparametric_;
850  double regularizationParameter_;
851  double bandwidthLimitFactor_;
852  double nEvents_;
853  mutable double maxBgEventsInWindow_;
854 
855  mutable unsigned long callCount_;
856  unsigned nIntegrationPoints_;
857  unsigned maxIterations_;
858  unsigned nFitCycles_;
859  int polyDegreeLimit_;
860  unsigned cvmode_;
861  unsigned bandwidthScanPoints_;
862  int m_;
863  mutable unsigned numRegularized_;
864 
866 
867  bool refitBandwidth_;
868  bool refitPolyDegree_;
869  bool verbose_;
870  bool useLeastSquaresCV_;
871  };
872 }
873 
874 #endif // NPSI_MINUITSEMIPARAMETRICFITFCN1D_HH_
Fit of LOrPE bandwidth and maximum poly degree in the mixed signal/background model.
Definition: MinuitLOrPEBgCVFcn1D.hh:28
Definition: MinuitSemiparametricFitFcn1D.hh:50
virtual double operator()(const std::vector< double > &x) const
Definition: MinuitSemiparametricFitFcn1D.hh:362
MinuitSemiparametricFitFcn1D(const npstat::HistoND< Numeric > &histo, const DensityConstructor &densityMaker, npstat::AbsSymbetaFilterProvider &fb, const npstat::BoundaryHandling &bm, const int symmetricBetaPower, const double minimumBgFeatureSize, const NumIn *initialApproximation=0, const unsigned lenApproximation=0U, const int polyDegreeLimit=-1, const bool verbose=true, const unsigned cvmode=npstat::CV_MODE_LINEARIZED, const bool useLeastSquaresCV=true, const bool refitBandwidth=true, const bool refitPolyDegree=true, const double regularizationParameter=-1.0, const unsigned nIntegrationPoints=0U, const double bandwidthUpperLimitFactor=5.0, const unsigned pseudoFitCycles=0U, const double convergenceEpsilon=0.0, const double pseudoFitPrecision=0.0, const unsigned bandwidthScanPoints=64U, const double minlog=log(DBL_MIN), const double up=0.5, const double upRefit=0.05, const unsigned maxIterations=10000U)
Definition: MinuitSemiparametricFitFcn1D.hh:209
unsigned lastRegularized() const
Definition: MinuitSemiparametricFitFcn1D.hh:310
Definition: AbsDistribution1D.hh:165
double scale() const
Definition: AbsDistribution1D.hh:183
double location() const
Definition: AbsDistribution1D.hh:180
Definition: AbsSymbetaFilterProvider.hh:28
Definition: BoundaryHandling.hh:21
Definition: HistoAxis.hh:31
double min() const
Definition: HistoAxis.hh:42
Definition: HistoND.hh:46
unsigned long nBins() const
Definition: HistoND.hh:226
Definition: fitCompositeJohnson.hh:16
Definition: AbsArrayProjector.hh:14
double symbetaBandwidthRatio(int power, unsigned filterDegree)
unsigned amisePluginDegreeGauss(double npoints)
unsigned maxFilterDegreeSupported()
unsigned lorpeRegularizeBgDensity1D(const HistoND< Numeric > &histo, double signalFraction, const NumOut *signalDensity, unsigned lenSignalDensity, NumOut *bgDensity, unsigned lenBgDensity, double minBgDensity1)
double maxBgPointsInWindow1D(const HistoND< Numeric > &histo, const AbsDistribution1D &signal, double signalFraction, unsigned nIntegrationPoints, double windowWidth, const NumIn *initialApproximation, unsigned lenApproximation, std::vector< double > &workspace)
unsigned amisePluginDegreeSymbeta(unsigned power, double npoints)
double lorpeBgLogli1D(const HistoND< Numeric > &histo, double signalFraction, const NumOut *signalDensity, unsigned lenSignalDensity, const NumOut *bgDensity, unsigned lenBgDensity, double minlog=log(DBL_MIN))
unsigned lorpeBackground1D(const HistoND< Numeric > &histo, AbsSymbetaFilterProvider &fbuilder, const BoundaryHandling &bm, const AbsDistribution1D &signal, double signalFraction, unsigned nIntegrationPoints, const NumIn *initialApproximation, unsigned lenApproximation, int m, double bandwidth, double maxDegree, double convergenceEpsilon, unsigned maxIterations, NumOut *signalDensity, unsigned lenSignalDensity, NumOut *bgDensity, unsigned lenBgDensity, std::vector< double > &workspace, NumOut *densityMinusOne=0, unsigned lenDensityMinusOne=0, unsigned cvmode=CV_MODE_LINEARIZED, double regularizationParameter=-1.0, double *lastDivergence=0)
Definition: AbsDistribution1D.hh:31