npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
UnfoldingBandwidthScanner1D.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_UNFOLDINGBANDWIDTHSCANNER1D_HH_
2 #define NPSTAT_UNFOLDINGBANDWIDTHSCANNER1D_HH_
3 
4 /*!
5 // \file UnfoldingBandwidthScanner1D.hh
6 //
7 // \brief Characterization of 1-d unfolding performance vs. filter bandwidth
8 //
9 // Author: I. Volobouev
10 //
11 // May 2014
12 */
13 
14 #include <map>
15 #include <string>
16 
18 #include "npstat/nm/Triple.hh"
19 
23 
24 namespace npstat {
25  namespace Private {
26  class UnfoldingBandwidthScanner1DHelper;
27  }
28 
29  class AbsBinnedComparison1D;
30 
32  {
33  public:
34  /**
35  // The constructor arguments are as follows:
36  //
37  // unfold -- An instance of AbsUnfold1D class.
38  //
39  // observed, lenObserved -- Observed data and its length. Can be
40  // modified later using "setObservedData"
41  // method.
42  //
43  // observationCovariance -- Covariance matrix for the observed
44  // values (can be NULL). If provided,
45  // this matrix should be symmetric and
46  // positive-definite. An internal copy
47  // will be made.
48  //
49  // oracle, lenOracle -- The "correct" unfolded distribution
50  // for use in various comparisons with
51  // unfolded results. If lenOracle == 0,
52  // this distribution will be considered
53  // unknown (in this case "oracle" pointer
54  // can be NULL), and the corresponding
55  // comparisons will not be made.
56  //
57  // symbetaPower -- This code will create smoothing filters
58  // maxDegree for the unfolded space using the
59  // xMinUnfolded function "symbetaLOrPEFilter1D".
60  // xMaxUnfolded These parameters will be passed to
61  // filterBoundaryMethod the "symbetaLOrPEFilter1D" call.
62  //
63  // nDoFCorrectionFactor -- The correction factor to use for
64  // determining the number of effective
65  // parameters in the fit. If this argument
66  // is positive, it will be multiplied
67  // by the number of parameters determined
68  // by the standard procedure (so set it to
69  // 1.0 in order not to apply any correction).
70  // If this argument is 0 or negative, the
71  // correction factor will be calculated as
72  // the fraction of bins filled in the observed
73  // data.
74  //
75  // foldedComparators -- The comparator objects to use in order
76  // to compare observed data with its fit.
77  //
78  // oracleComparators -- The comparator objects to use in order
79  // to compare unfolded data with the oracle
80  // (in case the oracle is actually provided).
81  */
83  AbsUnfold1D& unfold,
84  const double* observed, unsigned lenObserved,
85  const Matrix<double>* observationCovariance,
86  const double* oracle, unsigned lenOracle,
87  int symbetaPower, double maxDegree,
88  double xMinUnfolded, double xMaxUnfolded,
89  const BoundaryHandling& filterBoundaryMethod,
90  double nDoFCorrectionFactor,
91  const std::vector<const AbsBinnedComparison1D*>& foldedComparators,
92  const std::vector<const AbsBinnedComparison1D*>& oracleComparators);
93 
94  virtual ~UnfoldingBandwidthScanner1D();
95 
96  /**
97  // Names of the variables calculated by calling the "process" method.
98  // The meaning of the variables is described below. Variables which
99  // have * in front of their names will have meaningful values only
100  // in case the known "oracle" distribution was provided in the
101  // constructor.
102  //
103  // bandwidth -- Last bandwidth processed. Will be
104  // set to -1.0 if processing has failed
105  // and to -2.0 if "ntuplize" was called
106  // before the first call to "process".
107  // In case bandwidth is negative, values
108  // of all other variables are undefined
109  // (might simply remain unchanged from
110  // the previous "process" call). Always
111  // check that the bandwidth is
112  // non-negative before looking at other
113  // variables.
114  //
115  // foldedSum -- Sum of the Poisson means fitted to
116  // the observations.
117  //
118  // unfoldedSum -- Sum of the unfolded values.
119  //
120  // *smoothedOracleSum -- Sum of the smoothed oracle data.
121  //
122  // foldedLogli -- Poisson log-likelihood for the
123  // observations assuming that the
124  // Poisson means are given by the fit.
125  // If the covariance matrix of the
126  // observations was provided by the
127  // user, this variable will be set to
128  // -chi^2/2 instead.
129  //
130  // *unfoldedLogli -- Poisson log-likelihood for the
131  // unfolded distribution assuming that
132  // the Poisson means are given by the
133  // oracle data.
134  //
135  // *unfoldedISE -- Integrated squared error for the
136  // unfolded distribution w.r.t. the
137  // oracle (both distributions are
138  // normalized to 1). This variable is
139  // the easiest one to check in order
140  // to find out whether an oracle was
141  // provided (its value will be negative
142  // if it wasn't).
143  //
144  // *unfoldedDiagChisq -- Chi-squared of the unfolded
145  // distribution w.r.t. the oracle
146  // using only the diagonal elements
147  // of the unfolding covariance matrix.
148  //
149  // *smoothedUnfoldedLogli -- Poisson log-likelihood for the
150  // unfolded distribution assuming that
151  // the Poisson means are given by the
152  // smoothed oracle data.
153  //
154  // *smoothedUnfoldedISE -- Integrated squared error for the
155  // unfolded distribution w.r.t. the
156  // smoothed oracle (both distributions
157  // are normalized to 1).
158  //
159  // *smoothedUnfoldedDiagChisq -- Chi-squared of the unfolded
160  // distribution w.r.t. the smoothed
161  // oracle using only the diagonal
162  // elements of the covariance matrix.
163  //
164  // filterNDoFEntropic -- Effective number of degrees of
165  // filterNDoFTrace freedom for the smoothing matrix,
166  // (defined for S * S^T). "Entropic"
167  // means that the exponent of the
168  // eigenspectrum entropy is used to
169  // define effective NDoF, and "Trace"
170  // means that the ratio of the matrix
171  // trace to the largest eigenvalue is
172  // used.
173  //
174  // foldingNDoFEntropic -- Effective NDoF for the "folding"
175  // foldingNDoFTrace matrix F = K*S (defined for F * F^T.
176  // K is the response matrix here).
177  // F "folds" the unsmoothed distribution
178  // in the expectation-maximization
179  // iterations.
180  //
181  // unfoldedNDoFEntropic -- Effective NDoF for the unfolding
182  // unfoldedNDoFTrace covariance matrix.
183  //
184  // modelNDoFEntropic -- Effective NDoF for the matrix
185  // modelNDoFTrace H = K*E*(K*E)^T which plays a role
186  // similar to the hat matrix in
187  // regression. Here, E is the error
188  // propagation matrix for unfolding
189  // uniform observed distribution.
190  //
191  // AICcEntropic -- AIC (Akaike information criterion)
192  // AICcTrace with a correction for the finite
193  // sample size. Calculated using
194  // "foldedLogli" and corresponding
195  // "modelNDoF".
196  //
197  // smoothingNormfactor -- Normalization factor applied during
198  // the last smoothing procedure.
199  //
200  // integratedVariance -- Product of the covariance matrix
201  // trace and the bin width in the
202  // unfolded space.
203  //
204  // nIterations -- Number of iterations used to process
205  // this bandwidth (e.g., by the
206  // expectation-maximization method).
207  //
208  // unfoldingStatus -- Status returned by the "unfold" call
209  // of the unfolding object.
210  //
211  // foldedDistance_N -- With N = 0, 1, ..., up to
212  // foldedPValue_N foldedComparators.size() - 1.
213  // Distances and p-values between the
214  // observations and their fitted values,
215  // calculated by distribution comparators
216  // provided in the "foldedComparators"
217  // argument of the constructor.
218  //
219  // *oracleDistance_N -- With N = 0, 1, ..., up to
220  // *oraclePValue_N oracleComparators.size() - 1.
221  // Distances and p-values between oracle
222  // data and unfolded values, calculated
223  // by distribution comparators provided
224  // in the "oracleComparators" argument
225  // of the constructor.
226  //
227  // *smoothedOracleDistance_N -- With N = 0, 1, ..., up to
228  // *smoothedOraclePValue_N oracleComparators.size() - 1.
229  // Distances and p-values between
230  // smoothed oracle data and unfolded
231  // values, calculated by distribution
232  // comparators provided in the
233  // "oracleComparators" argument of
234  // the constructor.
235  */
236  virtual std::vector<std::string> variableNames() const;
237 
238  /**
239  // Number of variables calculated by the "process" method.
240  // See the description of "variableNames" method for details.
241  */
242  virtual unsigned variableCount() const;
243 
244  /**
245  // Write out produced variables into a common buffer.
246  // The order of the values will be consistent with the
247  // names returned by the "variableNames" method.
248  // The function returns the number of variables filled.
249  */
250  virtual unsigned ntuplize(double* buf, unsigned len) const;
251 
252  /**
253  // Perform unfolding with the given bandwidth. This method
254  // returns "true" on success and "false" on failure.
255  */
256  bool process(double bandwidth);
257 
258  /**
259  // Find the optimal bandwidth according to the AICc criterion and
260  // process that bandwidth. The arguments "bwmin", "bwmax", and
261  // "nsteps" specify the initial grid in the log space on which
262  // bandwidth search is performed ("nsteps" must be larger than 2).
263  // "startBw" is the bandwidth from which to start the search and
264  // "startingFactor" (>1.0) is an aproximate stepping factor for
265  // the beginning of the search. The argument "useEntropicNDoF"
266  // specifies whether the code should calculate effective NDoF
267  // for AICc using eigenspectrum entropy (if "true") or the trace
268  // (if "false") of the relevant matrix. Correspondingly, the code
269  // will minimize AICcEntropic or AICcTrace.
270  //
271  // As the intermediate results are memoized (filters, degrees of
272  // freedom, etc), it is best to call this function on different
273  // observed data with the same values of "bwmin", "bwmax", and
274  // "nsteps" arguments.
275  //
276  // This function returns the status of the search for the minimum AICc.
277  // The results of the "process" call for the corresponding bandwidth
278  // can be retrieved in the usual manner, using methods "ntuplize",
279  // "lastUnfoldingStatus", "lastBandwidth", "unfoldedResult", etc.
280  */
281  MinSearchStatus1D processAICcBandwidth(double bwmin, double bwmax,
282  unsigned nsteps, double startBw,
283  double startingFactor,
284  bool useEntropicNDoF = true);
285 
286  /** Last bandwidth processed */
287  inline double lastBandwidth() const {return bandwidth_;}
288 
289  /**
290  // Change the vector of observations. Array size must be compatible
291  // with that given in the constructor. The pointer to the covariance
292  // matrix of observations can be NULL. If provided, this matrix
293  // should be symmetric and positive-definite.
294  */
295  virtual void setObservedData(const double* observed, unsigned len,
296  const Matrix<double>* observCovariance);
297 
298  /**
299  // Set the bias (this is useful for various studies of uncertainties).
300  // If set, this bias will be added to the smoothed oracle data. The
301  // length must be compatible with the length of the unfolded result.
302  */
303  virtual void setBias(const double* unfoldingBias, unsigned len);
304 
305  /** Return the bias data provided by the last "setBias" call */
306  inline const std::vector<double>& getBias() const
307  {return biasData_;}
308 
309  /** Clear bias data */
310  inline virtual void clearBias() {biasData_.clear();}
311 
312  /** Length of the observed data vector */
313  inline unsigned observedSize() const {return observed_.size();}
314 
315  /** Length of the unfolded data vector */
316  inline unsigned unfoldedSize() const {return unfoldedVec_.size();}
317 
318  /** Status of the last unfolding call ("true" means success) */
319  inline virtual bool lastUnfoldingStatus() const
320  {return unfoldingStatus_;}
321 
322  /** Last unfolded distribution */
323  inline const std::vector<double>& unfoldedResult() const
324  {return unfoldedVec_;}
325 
326  /** Last unfolded covariance matrix */
327  inline const Matrix<double>& unfoldedCovariance() const
328  {return unfoldedCovmat_;}
329 
330  /** Response matrix */
331  inline const Matrix<double>& responseMatrix() const
332  {return unfold_.responseMatrix();}
333 
334  /**
335  // Oracle data. Empty vector is returned if the oracle
336  // was not provided in the constructor.
337  */
338  inline const std::vector<double>& getOracleData() const
339  {return oracle_;}
340 
341  /**
342  // Return oracle data smoothed with the last processed bandwidth.
343  // If the bias was set, it was added to this data.
344  */
345  inline const std::vector<double>& smoothedOracleData() const
346  {return smoothedOracleVec_;}
347 
348  /** Return eigenvector differences divided by sigma */
349  inline const std::vector<double>& eigenDeltas() const
350  {return eigenDeltas_;}
351 
352  /** Return covariance matrix eigenvalues (in the decreasing order) */
353  inline const std::vector<double>& covEigenValues() const
354  {return covEigenValues_;}
355 
356  /** Are we using convolutions with our filters? */
357  inline bool usingConvolutions() const
358  {return unfold_.usingConvolutions();}
359 
360  /** Set the initial approximation to the unfolded solution */
361  inline void setInitialApproximation(const double* approx,
362  unsigned lenApprox)
363  {unfold_.setInitialApproximation(approx, lenApprox);}
364 
365  /** Clear the initial approximation to the unfolded solution */
367  {unfold_.clearInitialApproximation();}
368 
369  /** Return the initial approximation to the unfolded solution */
370  inline const std::vector<double>& getInitialApproximation() const
371  {return unfold_.getInitialApproximation();}
372 
373  /** Correction factor for the number of degrees of freedom */
374  inline double nDoFCorrectionFactor() const {return nDoFCorr_;}
375 
376  /** Set the correction factor for the number of degrees of freedom */
377  inline void setNDoFCorrectionFactor(const double f) {nDoFCorr_ = f;}
378 
379  //@{
380  /** Function forwarded from AbsUnfold1D */
381  inline std::pair<double, double> smoothingNDoF() const
382  {return unfold_.smoothingNDoF();}
383  inline std::pair<double, double> responseNDoF() const
384  {return unfold_.responseNDoF();}
385  inline std::pair<double, double> smoothedResponseNDoF() const
386  {return unfold_.smoothedResponseNDoF();}
387  //@}
388 
389  /** Add a number of names to the given vector of strings */
390  static void addNamesWithPrefix(const char* prefix, unsigned count,
391  std::vector<std::string>* names);
392  protected:
393  // Return "true" on success
394  virtual bool performUnfolding();
395 
396  // Variables set by the constructor
397  AbsUnfold1D& unfold_;
398  std::vector<double> observed_;
399  std::vector<double> oracle_;
400  double maxLOrPEDegree_;
401  double xmin_;
402  double xmax_;
403  double binwidth_;
404  double nDoFCorr_;
405  double nObserved_;
406  double obsNonZeroFraction_;
407  BoundaryHandling bm_;
408  std::vector<const AbsBinnedComparison1D*> foldedComparators_;
409  std::vector<const AbsBinnedComparison1D*> oracleComparators_;
410  int symbetaPower_;
411  Matrix<double> observationCovariance_;
412  Matrix<double> observationErr_;
413 
414  // Bandwidth is set by the "process" method
415  double bandwidth_;
416 
417  // useEntropicNDoFinAICc from the "processAICcBandwidth" method
418  bool useEntropicNDoFinAICc_;
419 
420  // Unfolding results (calculated by the "performUnfolding" method)
421  std::vector<double> unfoldedVec_;
422  Matrix<double> unfoldedCovmat_;
423  std::vector<double> smoothedOracleVec_;
424  std::vector<double> eigenDeltas_;
425  std::vector<double> covEigenValues_;
426 
427  private:
428  friend class Private::UnfoldingBandwidthScanner1DHelper;
429 
430  // Disable copy constructor an assignment operator
433  UnfoldingBandwidthScanner1D& operator=(
435 
436  void smoothTheOracle(const double* oracle, double* smoothed) const;
437 
438  void getModelNDoF(double* ndof1, double* ndof2, double* ndof3, double bw);
439 
440  double getAICc(const double bw);
441 
442  double foldedLogLikelihood(const double* fitted,
443  const double* counts,
444  unsigned long len);
445 
446  static double getNonZeroFraction(const std::vector<double>&);
447 
448  // Bias
449  std::vector<double> biasData_;
450 
451  // Variables calculated by the "performUnfolding" method
452  std::vector<double> foldedDistances_;
453  std::vector<double> foldedPValues_;
454  std::vector<double> oracleDistances_;
455  std::vector<double> oraclePValues_;
456  std::vector<double> smoothedOracleDistances_;
457  std::vector<double> smoothedOraclePValues_;
458 
459  double foldedSum_;
460  double unfoldedSum_;
461  double smoothedOracleSum_;
462  double foldedLogli_;
463 
464  double unfoldedLogli_;
465  double unfoldedISE_;
466  double unfoldedDiagChisq_;
467 
468  double smoothedUnfoldedLogli_;
469  double smoothedUnfoldedISE_;
470  double smoothedUnfoldedDiagChisq_;
471 
472  double filterNDoFEntropic_;
473  double filterNDoFTrace_;
474  double foldingNDoFEntropic_;
475  double foldingNDoFTrace_;
476  double unfoldedNDoFEntropic_;
477  double unfoldedNDoFTrace_;
478  double modelNDoFEntropic_;
479  double modelNDoFTrace_;
480  double modelNDoF3_;
481 
482  double AICcEntropic_;
483  double AICcTrace_;
484  double AICc3_;
485 
486  double smoothingNormfactor_;
487  double integratedVariance_;
488  unsigned nIterations_;
489  bool unfoldingStatus_;
490 
491  // Variable set by "variableNames" call
492  mutable unsigned nVariables_;
493 
494  const LocalPolyFilter1D* oldFilter_;
495 
496  MemoizingSymbetaFilterProvider filterProvider_;
497  std::map<double,Triple<double,double,double> > ndofMap_;
498  CPP11_shared_ptr<const LocalPolyFilter1D> lastFilter_;
499 
500  std::vector<double> memBuf_, deltaBuf_;
501  };
502 }
503 
504 #endif // NPSTAT_UNFOLDINGBANDWIDTHSCANNER1D_HH_
Interface definition for 1-d unfolding algorithms.
API for LOrPE boundary handling methods.
Builds symmetric beta LOrPE filters and remembers these filters when the user sets the corresponding ...
Summary status of a search for a function minimum of a 1-d interval.
A simple analogue of std::pair with three components instead of two.
Definition: AbsUnfold1D.hh:29
bool usingConvolutions() const
Definition: AbsUnfold1D.hh:92
virtual void setInitialApproximation(const double *approx, unsigned lenApprox)
virtual void clearInitialApproximation()
std::pair< double, double > smoothingNDoF() const
virtual const std::vector< double > & getInitialApproximation() const
Definition: BoundaryHandling.hh:21
Definition: LocalPolyFilter1D.hh:38
Definition: MemoizingSymbetaFilterProvider.hh:27
Definition: UnfoldingBandwidthScanner1D.hh:32
const std::vector< double > & getInitialApproximation() const
Definition: UnfoldingBandwidthScanner1D.hh:370
bool process(double bandwidth)
virtual void setObservedData(const double *observed, unsigned len, const Matrix< double > *observCovariance)
virtual void clearBias()
Definition: UnfoldingBandwidthScanner1D.hh:310
virtual void setBias(const double *unfoldingBias, unsigned len)
unsigned observedSize() const
Definition: UnfoldingBandwidthScanner1D.hh:313
const Matrix< double > & responseMatrix() const
Definition: UnfoldingBandwidthScanner1D.hh:331
virtual unsigned ntuplize(double *buf, unsigned len) const
const std::vector< double > & smoothedOracleData() const
Definition: UnfoldingBandwidthScanner1D.hh:345
bool usingConvolutions() const
Definition: UnfoldingBandwidthScanner1D.hh:357
double lastBandwidth() const
Definition: UnfoldingBandwidthScanner1D.hh:287
const std::vector< double > & getBias() const
Definition: UnfoldingBandwidthScanner1D.hh:306
const Matrix< double > & unfoldedCovariance() const
Definition: UnfoldingBandwidthScanner1D.hh:327
void setInitialApproximation(const double *approx, unsigned lenApprox)
Definition: UnfoldingBandwidthScanner1D.hh:361
virtual bool lastUnfoldingStatus() const
Definition: UnfoldingBandwidthScanner1D.hh:319
unsigned unfoldedSize() const
Definition: UnfoldingBandwidthScanner1D.hh:316
static void addNamesWithPrefix(const char *prefix, unsigned count, std::vector< std::string > *names)
virtual unsigned variableCount() const
void setNDoFCorrectionFactor(const double f)
Definition: UnfoldingBandwidthScanner1D.hh:377
std::pair< double, double > smoothingNDoF() const
Definition: UnfoldingBandwidthScanner1D.hh:381
void clearInitialApproximation()
Definition: UnfoldingBandwidthScanner1D.hh:366
const std::vector< double > & unfoldedResult() const
Definition: UnfoldingBandwidthScanner1D.hh:323
const std::vector< double > & covEigenValues() const
Definition: UnfoldingBandwidthScanner1D.hh:353
const std::vector< double > & eigenDeltas() const
Definition: UnfoldingBandwidthScanner1D.hh:349
MinSearchStatus1D processAICcBandwidth(double bwmin, double bwmax, unsigned nsteps, double startBw, double startingFactor, bool useEntropicNDoF=true)
UnfoldingBandwidthScanner1D(AbsUnfold1D &unfold, const double *observed, unsigned lenObserved, const Matrix< double > *observationCovariance, const double *oracle, unsigned lenOracle, int symbetaPower, double maxDegree, double xMinUnfolded, double xMaxUnfolded, const BoundaryHandling &filterBoundaryMethod, double nDoFCorrectionFactor, const std::vector< const AbsBinnedComparison1D * > &foldedComparators, const std::vector< const AbsBinnedComparison1D * > &oracleComparators)
double nDoFCorrectionFactor() const
Definition: UnfoldingBandwidthScanner1D.hh:374
const std::vector< double > & getOracleData() const
Definition: UnfoldingBandwidthScanner1D.hh:338
virtual std::vector< std::string > variableNames() const
Definition: AbsArrayProjector.hh:14