npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
UnfoldingBandwidthScannerND.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_UNFOLDINGBANDWIDTHSCANNERND_HH_
2 #define NPSTAT_UNFOLDINGBANDWIDTHSCANNERND_HH_
3 
4 /*!
5 // \file UnfoldingBandwidthScannerND.hh
6 //
7 // \brief Characterization of multivariate unfolding performance vs. bandwidth
8 //
9 // Author: I. Volobouev
10 //
11 // June 2014
12 */
13 
14 #include <map>
15 
19 
20 namespace npstat {
22  {
23  public:
24  /**
25  // The constructor arguments are as follows:
26  //
27  // unfold -- An instance of AbsUnfoldND class.
28  //
29  // filterParameters -- Specifications how to build the
30  // smoothing filters for each dimension
31  // of the unfolded data. The corresponding
32  // values will be eventually passed to the
33  // "symbetaLOrPEFilter1D" call. The number
34  // of elements in this vector must be equal
35  // to the rank of the response matrix used
36  // by the "unfold" argument.
37  //
38  // observed -- Observed data. Can be modified later
39  // using "setObservedData" method. Array
40  // shape, however, can not change.
41  //
42  // nDoFCorrectionFactor -- The correction factor to use for
43  // determining the number of effective
44  // parameters in the fit. If this argument
45  // is positive, it will be multiplied
46  // by the number of parameters determined
47  // by the standard procedure (so set it to
48  // 1.0 in order not to apply any correction).
49  // If this argument is 0 or negative, the
50  // correction factor will be calculated as
51  // the fraction of bins filled in the observed
52  // data.
53  //
54  // observationCovariance -- Covariance matrix for the observed
55  // values. If not NULL, this matrix should
56  // be symmetric and positive-definite.
57  // An internal copy will be made.
58  //
59  // oracle -- The "correct" unfolded distribution
60  // for use in various comparisons with
61  // unfolded results. If oracle is NULL,
62  // this distribution will be considered
63  // unknown, and the corresponding
64  // comparisons will not be made.
65  */
67  AbsUnfoldND& unfold,
68  const std::vector<SymbetaParams1D>& filterParameters,
69  const ArrayND<double>& observed,
70  double nDoFCorrectionFactor,
71  const Matrix<double>* observationCovariance = 0,
72  const ArrayND<double>* oracle = 0);
73 
74  virtual ~UnfoldingBandwidthScannerND();
75 
76  /** Dimensionality of the unfolded space */
77  inline unsigned unfoldedDim() const
78  {return filterParameters_.size();}
79 
80  /** Dimensionality of the observed space */
81  inline unsigned observedDim() const
82  {return unfold_.responseMatrix().observedShape().size();}
83 
84  /**
85  // Names of the variables calculated by calling the "process" method.
86  // The meaning of the variables is described below. Variables which
87  // have * in front of their names will have meaningful values only
88  // in case the known "oracle" distribution was provided in the
89  // constructor.
90  //
91  // bandwidth_N -- With N = 0, 1, ..., up to
92  // filterParameters.size() - 1.
93  // Last bandwidth set processed. All
94  // these values will be set to -2.0 if
95  // "ntuplize" was called before the
96  // first call to "process".
97  //
98  // foldedSum -- Sum of the Poisson means fitted to
99  // the observations.
100  //
101  // unfoldedSum -- Sum of the unfolded values.
102  //
103  // *smoothedOracleSum -- Sum of the smoothed oracle data.
104  //
105  // foldedLogli -- Poisson log-likelihood for the
106  // observations assuming that the
107  // Poisson means are given by the fit.
108  // If the covariance matrix of the
109  // observations was provided by the
110  // user, this variable will be set to
111  // -chi^2/2 instead.
112  //
113  // *unfoldedLogli -- Poisson log-likelihood for the
114  // unfolded distribution assuming that
115  // the Poisson means are given by the
116  // oracle data.
117  //
118  // *unfoldedISE -- Integrated squared error for the
119  // unfolded distribution w.r.t. the
120  // oracle (both distributions are
121  // normalized to 1). This variable is
122  // the easiest one to check in order
123  // to find out whether an oracle was
124  // provided (its value will be negative
125  // if it wasn't).
126  //
127  // *unfoldedDiagChisq -- Chi-squared of the unfolded
128  // distribution w.r.t. the oracle
129  // using only the diagonal elements
130  // of the unfolding covariance matrix.
131  //
132  // *smoothedUnfoldedLogli -- Poisson log-likelihood for the
133  // unfolded distribution assuming that
134  // the Poisson means are given by the
135  // smoothed racle data.
136  //
137  // *smoothedUnfoldedISE -- Integrated squared error for the
138  // unfolded distribution w.r.t. the
139  // smoothed oracle (both distributions
140  // are normalized to 1).
141  //
142  // *smoothedUnfoldedDiagChisq -- Chi-squared of the unfolded
143  // distribution w.r.t. the smoothed
144  // oracle using only the diagonal
145  // elements of the covariance matrix.
146  //
147  // modelNDoFEntropic -- Effective NDoF for the matrix
148  // modelNDoFTrace H = K*E*(K*E)^T which plays a role
149  // similar to the hat matrix in
150  // regression. Here, E is the error
151  // propagation matrix for unfolding
152  // uniform observed distribution.
153  // "Entropic" means that the exponent of
154  // the eigenspectrum entropy is used to
155  // define effective NDoF, and "Trace"
156  // means that the ratio of the matrix
157  // trace to the largest eigenvalue is
158  // used.
159  //
160  // AICcEntropic -- AIC (Akaike information criterion)
161  // AICcTrace with a correction for the finite
162  // sample size. Calculated using
163  // "foldedLogli" and corresponding
164  // "modelNDoF".
165  //
166  // smoothingNormfactor -- Normalization factor applied during
167  // the last smoothing procedure.
168  //
169  // integratedVariance -- Product of the covariance matrix
170  // trace and the bin width in the
171  // unfolded space.
172  //
173  // nIterations -- Number of iterations used to process
174  // this bandwidth (e.g., by the
175  // expectation-maximization method).
176  //
177  // unfoldingStatus -- Status returned by the "unfold" call
178  // of the unfolding object.
179  */
180  virtual std::vector<std::string> variableNames() const;
181 
182  /**
183  // Number of variables calculated by the "process" method.
184  // See the description of "variableNames" method for details.
185  */
186  virtual unsigned variableCount() const;
187 
188  /**
189  // Write out produced variables into a common buffer.
190  // The order of the values will be consistent with the
191  // names returned by the "variableNames" method.
192  // The function returns the number of variables filled.
193  */
194  virtual unsigned ntuplize(double* buf, unsigned len) const;
195 
196  /**
197  // Perform unfolding with the given bandwidth values. This method
198  // returns "true" on success and "false" on failure.
199  */
200  bool process(const std::vector<double>& bandwidthValues);
201 
202  /** Last bandwidth values processed */
203  inline const std::vector<double>& lastBandwidthValues() const
204  {return bandwidthValues_;}
205 
206  /**
207  // Change the array of observations. Array dimensions must be
208  // compatible with those given in the constructor. The pointer
209  // to the covariance matrix of observations can be NULL. If provided,
210  // this matrix should be symmetric and positive-definite.
211  */
212  virtual void setObservedData(const ArrayND<double>& observed,
213  const Matrix<double>* observCovariance);
214 
215  /** Return the observed data */
216  inline const ArrayND<double>& getObservedData() const {return observed_;}
217 
218  /**
219  // Set the bias (this is useful for various studies of uncertainties).
220  // If set, this bias will be added to the smoothed oracle data. The
221  // dimensions must be compatible with those of the unfolded result.
222  */
223  virtual void setBias(const ArrayND<double>& bias);
224 
225  /** Return the bias data provided by the last "setBias" call */
226  inline const ArrayND<double>& getBias() const {return bias_;}
227 
228  /** Clear bias data */
229  inline virtual void clearBias() {bias_.uninitialize();}
230 
231  /** Status of the last unfolding call ("true" means success) */
232  inline virtual bool lastUnfoldingStatus() const
233  {return unfoldingStatus_;}
234 
235  /** Last unfolded distribution */
236  inline const ArrayND<double>& unfoldedResult() const
237  {return unfolded_;}
238 
239  /** Last unfolded covariance matrix */
240  inline const Matrix<double>& unfoldedCovariance() const
241  {return unfoldedCovmat_;}
242 
243  /** Response matrix */
244  inline const ResponseMatrix& responseMatrix() const
245  {return unfold_.responseMatrix();}
246 
247  /**
248  // Return oracle data smoothed with the last processed bandwidth.
249  // If the bias was set, it was added to this data.
250  */
251  inline const ArrayND<double>& smoothedOracle() const
252  {return smoothedOracle_;}
253 
254  /** Return eigenvector differences divided by sigma */
255  inline const std::vector<double>& eigenDeltas() const
256  {return eigenDeltas_;}
257 
258  /** Return covariance matrix eigenvalues (in the decreasing order) */
259  inline const std::vector<double>& covEigenValues() const
260  {return covEigenValues_;}
261 
262  /** Are we using convolutions with our filters? */
263  inline bool usingConvolutions() const
264  {return unfold_.usingConvolutions();}
265 
266  /** Set the initial approximation to the unfolded solution */
267  inline void setInitialApproximation(const ArrayND<double>& approx)
268  {unfold_.setInitialApproximation(approx);}
269 
270  /** Clear the initial approximation to the unfolded solution */
272  {unfold_.clearInitialApproximation();}
273 
274  /** Return the initial approximation to the unfolded solution */
276  {return unfold_.getInitialApproximation();}
277 
278  /** Correction factor for the number of degrees of freedom */
279  inline double nDoFCorrectionFactor() const {return nDoFCorr_;}
280 
281  /** Set the correction factor for the number of degrees of freedom */
282  inline void setNDoFCorrectionFactor(const double f) {nDoFCorr_ = f;}
283 
284  protected:
285  // Return "true" on success
286  virtual bool performUnfolding();
287 
288  // Variables set by the constructor
289  AbsUnfoldND& unfold_;
290  std::vector<SymbetaParams1D> filterParameters_;
291  ArrayND<double> observed_;
292  ArrayND<double> oracle_;
293  Matrix<double> observationCovariance_;
294  Matrix<double> observationErr_;
295  double binVolume_;
296  double nDoFCorr_;
297  double nObserved_;
298  double obsNonZeroFraction_;
299 
300  // Bandwidth is set by the "process" method
301  std::vector<double> bandwidthValues_;
302 
303  // Unfolding results (calculated by the "performUnfolding" method)
304  ArrayND<double> unfolded_;
305  Matrix<double> unfoldedCovmat_;
306  ArrayND<double> smoothedOracle_;
307  std::vector<double> eigenDeltas_;
308  std::vector<double> covEigenValues_;
309 
310  private:
311  // Disable copy constructor an assignment operator
314  UnfoldingBandwidthScannerND& operator=(
316 
317  void getModelNDoF(double* ndof1, double* ndof2,
318  const std::vector<double>& bwValues);
319 
320  double foldedLogLikelihood(const ArrayND<double>& fitted,
321  const ArrayND<double>& observed);
322 
323  static double getNonZeroFraction(const ArrayND<double>&);
324 
325  // Folded distribution estimate
326  ArrayND<double> folded_;
327 
328  // Bias
329  ArrayND<double> bias_;
330 
331  double foldedSum_;
332  double unfoldedSum_;
333  double smoothedOracleSum_;
334  double foldedLogli_;
335 
336  double unfoldedLogli_;
337  double unfoldedISE_;
338  double unfoldedDiagChisq_;
339 
340  double smoothedUnfoldedLogli_;
341  double smoothedUnfoldedISE_;
342  double smoothedUnfoldedDiagChisq_;
343 
344  double modelNDoFEntropic_;
345  double modelNDoFTrace_;
346  double AICcEntropic_;
347  double AICcTrace_;
348 
349  double smoothingNormfactor_;
350  double integratedVariance_;
351  unsigned nIterations_;
352  bool unfoldingStatus_;
353 
354  // Variable set by "variableNames" call
355  mutable unsigned nVariables_;
356 
357  const AbsUnfoldingFilterND* oldFilter_;
358 
359  MemoizingSymbetaFilterProvider filterProvider_;
360  std::map<std::vector<double>, std::pair<double,double> > ndofMap_;
361 
362  const AbsUnfoldingFilterND* localFilter_;
363 
364  std::vector<const LocalPolyFilter1D*> localFilterBuf_;
365  std::vector<double> deltaBuf_;
366  };
367 }
368 
369 #endif // NPSTAT_UNFOLDINGBANDWIDTHSCANNERND_HH_
Interface definition for multivariate unfolding algorithms.
Builds symmetric beta LOrPE filters and remembers these filters when the user sets the corresponding ...
Parameters of 1-d filters from the symmetric beta family.
Definition: AbsUnfoldND.hh:19
virtual const ArrayND< double > & getInitialApproximation() const
virtual void setInitialApproximation(const ArrayND< double > &approx)
bool usingConvolutions() const
Definition: AbsUnfoldND.hh:60
virtual void clearInitialApproximation()
Definition: AbsUnfoldingFilterND.hh:28
ArrayND & uninitialize()
Definition: MemoizingSymbetaFilterProvider.hh:27
Definition: ResponseMatrix.hh:23
Definition: UnfoldingBandwidthScannerND.hh:22
void setInitialApproximation(const ArrayND< double > &approx)
Definition: UnfoldingBandwidthScannerND.hh:267
virtual void setBias(const ArrayND< double > &bias)
unsigned unfoldedDim() const
Definition: UnfoldingBandwidthScannerND.hh:77
UnfoldingBandwidthScannerND(AbsUnfoldND &unfold, const std::vector< SymbetaParams1D > &filterParameters, const ArrayND< double > &observed, double nDoFCorrectionFactor, const Matrix< double > *observationCovariance=0, const ArrayND< double > *oracle=0)
unsigned observedDim() const
Definition: UnfoldingBandwidthScannerND.hh:81
const ResponseMatrix & responseMatrix() const
Definition: UnfoldingBandwidthScannerND.hh:244
const std::vector< double > & covEigenValues() const
Definition: UnfoldingBandwidthScannerND.hh:259
bool process(const std::vector< double > &bandwidthValues)
const ArrayND< double > & getBias() const
Definition: UnfoldingBandwidthScannerND.hh:226
const std::vector< double > & eigenDeltas() const
Definition: UnfoldingBandwidthScannerND.hh:255
const std::vector< double > & lastBandwidthValues() const
Definition: UnfoldingBandwidthScannerND.hh:203
virtual void clearBias()
Definition: UnfoldingBandwidthScannerND.hh:229
virtual void setObservedData(const ArrayND< double > &observed, const Matrix< double > *observCovariance)
const ArrayND< double > & unfoldedResult() const
Definition: UnfoldingBandwidthScannerND.hh:236
virtual unsigned variableCount() const
const ArrayND< double > & smoothedOracle() const
Definition: UnfoldingBandwidthScannerND.hh:251
const ArrayND< double > & getInitialApproximation() const
Definition: UnfoldingBandwidthScannerND.hh:275
virtual unsigned ntuplize(double *buf, unsigned len) const
double nDoFCorrectionFactor() const
Definition: UnfoldingBandwidthScannerND.hh:279
virtual bool lastUnfoldingStatus() const
Definition: UnfoldingBandwidthScannerND.hh:232
void setNDoFCorrectionFactor(const double f)
Definition: UnfoldingBandwidthScannerND.hh:282
bool usingConvolutions() const
Definition: UnfoldingBandwidthScannerND.hh:263
virtual std::vector< std::string > variableNames() const
void clearInitialApproximation()
Definition: UnfoldingBandwidthScannerND.hh:271
const ArrayND< double > & getObservedData() const
Definition: UnfoldingBandwidthScannerND.hh:216
const Matrix< double > & unfoldedCovariance() const
Definition: UnfoldingBandwidthScannerND.hh:240
Definition: AbsArrayProjector.hh:14