npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
SparseUnfoldingBandwidthScannerND.hh
Go to the documentation of this file.
1 #ifndef EMSUNFOLD_SPARSEUNFOLDINGBANDWIDTHSCANNERND_HH_
2 #define EMSUNFOLD_SPARSEUNFOLDINGBANDWIDTHSCANNERND_HH_
3 
4 /*!
5 // \file SparseUnfoldingBandwidthScannerND.hh
6 //
7 // \brief Characterization of multivariate unfolding performance vs. bandwidth
8 // for unfolding classes that utilize sparse matrices
9 //
10 // Author: I. Volobouev
11 //
12 // July 2014
13 */
14 
15 #include <map>
16 
19 
22 
23 namespace emsunfold {
24  template<class Matrix>
26  {
27  public:
29  typedef typename unfolding_type::response_matrix_type response_matrix_type;
30  typedef typename unfolding_type::input_covariance_type input_covariance_type;
31  typedef typename unfolding_type::output_covariance_type output_covariance_type;
33 
34  /**
35  // The constructor arguments are as follows:
36  //
37  // unfold -- An instance of AbsSparseUnfoldND class.
38  //
39  // filterParameters -- Specifications how to build the
40  // smoothing filters for each dimension
41  // of the unfolded data. The corresponding
42  // values will be eventually passed to the
43  // "symbetaLOrPEFilter1D" call. The number
44  // of elements in this vector must be equal
45  // to the rank of the response matrix used
46  // by the "unfold" argument.
47  //
48  // trlanParameters -- Parameters which steer determination
49  // of covariance matrix eigenspectrum by
50  // TRLAN.
51  //
52  // minAbsoluteCorrelation -- Correlation coefficients of the unfolded
53  // covariance matrix with absolute values
54  // below this cutoff will be set to 0 in
55  // an attempt to reduce the matrix size.
56  // If this argument is 0 or negative, the
57  // unfolded covariance matrix will not be
58  // pruned.
59  //
60  // observed -- Observed data. Can be modified later
61  // using "setObservedData" method. Array
62  // shape, however, can not change.
63  //
64  // nDoFCorrectionFactor -- The correction factor to use for
65  // determining the number of effective
66  // parameters in the fit. If this argument
67  // is positive, it will be multiplied
68  // by the number of parameters determined
69  // by the standard procedure (so set it to
70  // 1.0 in order not to apply any correction).
71  // If this argument is 0 or negative, the
72  // correction factor will be calculated
73  // as the fraction of bins filled in the
74  // observed data.
75  //
76  // observationCovariance -- Covariance matrix for the observed
77  // values. If not NULL, this matrix should
78  // be symmetric and positive-definite.
79  // An internal copy will be made.
80  //
81  // oracle -- The "correct" unfolded distribution
82  // for use in various comparisons with
83  // unfolded results. If oracle is NULL,
84  // this distribution will be considered
85  // unknown, and the corresponding
86  // comparisons will not be made.
87  */
89  unfolding_type& unfold,
90  const std::vector<npstat::SymbetaParams1D>& filterParameters,
91  const EigenParameters& trlanParameters,
92  double minAbsoluteCorrelation,
93  const npstat::ArrayND<double>& observed,
94  double nDoFCorrectionFactor,
95  const input_covariance_type* observationCovariance = 0,
96  const npstat::ArrayND<double>* oracle = 0);
97 
99 
100  /** Dimensionality of the unfolded space */
101  inline unsigned unfoldedDim() const
102  {return filterParameters_.size();}
103 
104  /** Dimensionality of the observed space */
105  inline unsigned observedDim() const
106  {return unfold_.getObservedShape().size();}
107 
108  /**
109  // Names of the variables calculated by calling the "process" method.
110  // The meaning of the variables is described below. Variables which
111  // have * in front of their names will have meaningful values only
112  // in case the known "oracle" distribution was provided in the
113  // constructor.
114  //
115  // bandwidth_N -- With N = 0, 1, ..., up to
116  // filterParameters.size() - 1.
117  // Last bandwidth set processed. All
118  // these values will be set to -2.0 if
119  // "ntuplize" was called before the
120  // first call to "process".
121  //
122  // unprunedNonZeroFrac -- Fraction of non-zero entries in the
123  // unfolding covariance matrix before
124  // pruning of small correlations.
125  //
126  // nonZeroFrac -- Fraction of non-zero entries in the
127  // unfolding covariance matrix after
128  // pruning of small correlations.
129  //
130  // foldedSum -- Sum of the Poisson means fitted to
131  // the observations.
132  //
133  // unfoldedSum -- Sum of the unfolded values.
134  //
135  // *smoothedOracleSum -- Sum of the smoothed oracle data.
136  //
137  // foldedLogli -- Poisson log-likelihood for the
138  // observations assuming that the
139  // Poisson means are given by the fit.
140  // If the covariance matrix of the
141  // observations was provided by the
142  // user, this variable will be set to
143  // -chi^2/2 instead.
144  //
145  // *unfoldedLogli -- Poisson log-likelihood for the
146  // unfolded distribution assuming that
147  // the Poisson means are given by the
148  // oracle data.
149  //
150  // *unfoldedISE -- Integrated squared error for the
151  // unfolded distribution w.r.t. the
152  // oracle (both distributions are
153  // normalized to 1). This variable is
154  // the easiest one to check in order
155  // to find out whether an oracle was
156  // provided (its value will be negative
157  // if it wasn't).
158  //
159  // *unfoldedDiagChisq -- Chi-squared of the unfolded
160  // distribution w.r.t. the oracle
161  // using only the diagonal elements
162  // of the unfolding covariance matrix.
163  //
164  // *smoothedUnfoldedLogli -- Poisson log-likelihood for the
165  // unfolded distribution assuming that
166  // the Poisson means are given by the
167  // smoothed racle data.
168  //
169  // *smoothedUnfoldedISE -- Integrated squared error for the
170  // unfolded distribution w.r.t. the
171  // smoothed oracle (both distributions
172  // are normalized to 1).
173  //
174  // *smoothedUnfoldedDiagChisq -- Chi-squared of the unfolded
175  // distribution w.r.t. the smoothed
176  // oracle using only the diagonal
177  // elements of the covariance matrix.
178  //
179  // modelNDoFEntropic -- Effective NDoF for the matrix
180  // modelNDoFTrace H = K*E*(K*E)^T which plays a role
181  // similar to the hat matrix in
182  // regression. Here, E is the error
183  // propagation matrix for unfolding
184  // uniform observed distribution.
185  // "Entropic" means that the exponent of
186  // the eigenspectrum entropy is used to
187  // define effective NDoF, and "Trace"
188  // means that the ratio of the matrix
189  // trace to the largest eigenvalue is
190  // used.
191  //
192  // AICcEntropic -- AIC (Akaike information criterion)
193  // AICcTrace with a correction for the finite
194  // sample size. Calculated using
195  // "foldedLogli" and corresponding
196  // "modelNDoF".
197  //
198  // smoothingNormfactor -- Normalization factor applied during
199  // the last smoothing procedure.
200  //
201  // integratedVariance -- Product of the covariance matrix
202  // trace and the bin width in the
203  // unfolded space.
204  //
205  // nIterations -- Number of iterations used to process
206  // this bandwidth (e.g., by the
207  // expectation-maximization method).
208  //
209  // unfoldingStatus -- Status returned by the "unfold" call
210  // of the unfolding object.
211  //
212  // trlan_* -- Multiple variables which provide
213  // TRLAN diagnostic information. See
214  // the description of TrlanDiagnostics
215  // class.
216  */
217  virtual std::vector<std::string> variableNames() const;
218 
219  /**
220  // Number of variables calculated by the "process" method.
221  // See the description of "variableNames" method for details.
222  */
223  virtual unsigned variableCount() const;
224 
225  /**
226  // Write out produced variables into a common buffer.
227  // The order of the values will be consistent with the
228  // names returned by the "variableNames" method.
229  // The function returns the number of variables filled.
230  */
231  virtual unsigned ntuplize(double* buf, unsigned len) const;
232 
233  /**
234  // Perform unfolding with the given bandwidth values. This method
235  // returns "true" on success and "false" on failure.
236  */
237  bool process(const std::vector<double>& bandwidthValues);
238 
239  /** Last bandwidth values processed */
240  inline const std::vector<double>& lastBandwidthValues() const
241  {return bandwidthValues_;}
242 
243  /**
244  // Change the array of observations. Array dimensions must be
245  // compatible with those given in the constructor. The pointer
246  // to the covariance matrix of observations can be NULL. If provided,
247  // this matrix should be symmetric and positive-definite.
248  */
249  virtual void setObservedData(
250  const npstat::ArrayND<double>& observed,
251  const input_covariance_type* observationCovarianceMatrix);
252 
253  /** Return the observed data */
255  {return observed_;}
256 
257  /**
258  // Set the bias (this is useful for various studies of uncertainties).
259  // If set, this bias will be added to the smoothed oracle data. The
260  // dimensions must be compatible with those of the unfolded result.
261  */
262  virtual void setBias(const npstat::ArrayND<double>& bias);
263 
264  /** Return the bias data provided by the last "setBias" call */
265  inline const npstat::ArrayND<double>& getBias() const {return bias_;}
266 
267  /** Clear bias data */
268  inline virtual void clearBias() {bias_.uninitialize();}
269 
270  /** Status of the last unfolding call ("true" means success) */
271  inline virtual bool lastSparseUnfoldingStatus() const
272  {return unfoldingStatus_;}
273 
274  /** Last unfolded distribution */
276  {return unfolded_;}
277 
278  /** Last unfolded covariance matrix */
279  inline const output_covariance_type& unfoldedCovariance() const
280  {return unfoldedCovmat_;}
281 
282  /** Response matrix */
283  inline const response_matrix_type& responseMatrix() const
284  {return unfold_.responseMatrix();}
285 
286  /**
287  // Return oracle data smoothed with the last processed bandwidth.
288  // If the bias was set, it was added to this data.
289  */
291  {return smoothedOracle_;}
292 
293  /** Return eigenvector differences divided by sigma */
294  inline const std::vector<double>& eigenDeltas() const
295  {return eigenDeltas_;}
296 
297  /** Return covariance matrix eigenvalues (in the decreasing order) */
298  inline const std::vector<double>& covEigenValues() const
299  {return covEigenValues_;}
300 
301  /** Are we using convolutions with our filters? */
302  inline bool usingConvolutions() const
303  {return unfold_.usingConvolutions();}
304 
305  /** Set the initial approximation to the unfolded solution */
307  {unfold_.setInitialApproximation(a);}
308 
309  /** Clear the initial approximation to the unfolded solution */
311  {unfold_.clearInitialApproximation();}
312 
313  /** Return the initial approximation to the unfolded solution */
315  {return unfold_.getInitialApproximation();}
316 
317  /** Correction factor for the number of degrees of freedom */
318  inline double nDoFCorrectionFactor() const {return nDoFCorr_;}
319 
320  /** Set the correction factor for the number of degrees of freedom */
321  inline void setNDoFCorrectionFactor(const double f) {nDoFCorr_ = f;}
322 
323  protected:
324  // Return "true" on success
325  virtual bool performUnfolding();
326 
327  // Variables set by the constructor
328  unfolding_type& unfold_;
329  std::vector<npstat::SymbetaParams1D> filterParameters_;
330  EigenParameters trlanParameters_;
331  double minAbsoluteCorrelation_;
332  npstat::ArrayND<double> observed_;
333  npstat::ArrayND<double> oracle_;
334  input_covariance_type observationCovariance_;
335  bool haveObservationCovariance_;
336  double binVolume_;
337  double nDoFCorr_;
338  double nObserved_;
339  double obsNonZeroFraction_;
340 
341  // Bandwidth is set by the "process" method
342  std::vector<double> bandwidthValues_;
343 
344  // Unfolding results (calculated by the "performUnfolding" method)
345  npstat::ArrayND<double> unfolded_;
346  output_covariance_type unfoldedCovmat_;
347  npstat::ArrayND<double> smoothedOracle_;
348  std::vector<double> eigenDeltas_;
349  std::vector<double> covEigenValues_;
350  TrlanDiagnostics diagnose_;
351 
352  private:
353  // Disable copy constructor an assignment operator
358 
359  void getModelNDoF(double* ndof1, double* ndof2,
360  const std::vector<double>& bwValues);
361 
362  double foldedLogLikelihood(const npstat::ArrayND<double>& fitted,
363  const npstat::ArrayND<double>& observed);
364 
365  bool unfoldAndPrune(const npstat::ArrayND<double>& observed,
366  const input_covariance_type* observationCovMat,
367  double* initialNonzeroFraction);
368 
369  static double getNonZeroFraction(const npstat::ArrayND<double>&);
370 
371  // Folded distribution estimate
372  npstat::ArrayND<double> folded_;
373 
374  // Bias
376 
377  double unprunedNonZeroFrac_;
378  double nonZeroFrac_;
379 
380  double foldedSum_;
381  double unfoldedSum_;
382  double smoothedOracleSum_;
383  double foldedLogli_;
384 
385  double unfoldedLogli_;
386  double unfoldedISE_;
387  double unfoldedDiagChisq_;
388 
389  double smoothedUnfoldedLogli_;
390  double smoothedUnfoldedISE_;
391  double smoothedUnfoldedDiagChisq_;
392 
393  double modelNDoFEntropic_;
394  double modelNDoFTrace_;
395  double AICcEntropic_;
396  double AICcTrace_;
397 
398  double smoothingNormfactor_;
399  double integratedVariance_;
400  unsigned nIterations_;
401  bool unfoldingStatus_;
402 
403  // Variable set by "variableNames" call
404  mutable unsigned nVariables_;
405 
406  const filter_type* oldFilter_;
407 
409  std::map<std::vector<double>, std::pair<double,double> > ndofMap_;
410 
411  const filter_type* localFilter_;
412 
413  std::vector<const npstat::LocalPolyFilter1D*> localFilterBuf_;
414  std::vector<double> deltaBuf_;
415  };
416 }
417 
418 #include "npstat/emsunfold/SparseUnfoldingBandwidthScannerND.icc"
419 
420 #endif // EMSUNFOLD_SPARSEUNFOLDINGBANDWIDTHSCANNERND_HH_
Interface definition for multivariate unfolding algorithms that use sparse matrices.
Builds symmetric beta LOrPE filters and remembers these filters when the user sets the corresponding ...
Parameters of 1-d filters from the symmetric beta family.
Definition: AbsSparseUnfoldND.hh:22
virtual void setInitialApproximation(const npstat::ArrayND< double > &a)
virtual void clearInitialApproximation()
npstat::ArrayShape getObservedShape() const
Definition: AbsSparseUnfoldND.hh:72
virtual const npstat::ArrayND< double > & getInitialApproximation() const
bool usingConvolutions() const
Definition: AbsSparseUnfoldND.hh:69
Definition: AbsSparseUnfoldingFilterND.hh:27
Definition: EigenParameters.hh:20
Definition: SparseUnfoldingBandwidthScannerND.hh:26
void clearInitialApproximation()
Definition: SparseUnfoldingBandwidthScannerND.hh:310
virtual void setObservedData(const npstat::ArrayND< double > &observed, const input_covariance_type *observationCovarianceMatrix)
const npstat::ArrayND< double > & getInitialApproximation() const
Definition: SparseUnfoldingBandwidthScannerND.hh:314
const output_covariance_type & unfoldedCovariance() const
Definition: SparseUnfoldingBandwidthScannerND.hh:279
virtual unsigned ntuplize(double *buf, unsigned len) const
const response_matrix_type & responseMatrix() const
Definition: SparseUnfoldingBandwidthScannerND.hh:283
virtual void setBias(const npstat::ArrayND< double > &bias)
bool usingConvolutions() const
Definition: SparseUnfoldingBandwidthScannerND.hh:302
const npstat::ArrayND< double > & smoothedOracle() const
Definition: SparseUnfoldingBandwidthScannerND.hh:290
unsigned observedDim() const
Definition: SparseUnfoldingBandwidthScannerND.hh:105
const npstat::ArrayND< double > & unfoldedResult() const
Definition: SparseUnfoldingBandwidthScannerND.hh:275
void setNDoFCorrectionFactor(const double f)
Definition: SparseUnfoldingBandwidthScannerND.hh:321
const std::vector< double > & covEigenValues() const
Definition: SparseUnfoldingBandwidthScannerND.hh:298
const std::vector< double > & lastBandwidthValues() const
Definition: SparseUnfoldingBandwidthScannerND.hh:240
SparseUnfoldingBandwidthScannerND(unfolding_type &unfold, const std::vector< npstat::SymbetaParams1D > &filterParameters, const EigenParameters &trlanParameters, double minAbsoluteCorrelation, const npstat::ArrayND< double > &observed, double nDoFCorrectionFactor, const input_covariance_type *observationCovariance=0, const npstat::ArrayND< double > *oracle=0)
const std::vector< double > & eigenDeltas() const
Definition: SparseUnfoldingBandwidthScannerND.hh:294
virtual std::vector< std::string > variableNames() const
bool process(const std::vector< double > &bandwidthValues)
unsigned unfoldedDim() const
Definition: SparseUnfoldingBandwidthScannerND.hh:101
virtual void clearBias()
Definition: SparseUnfoldingBandwidthScannerND.hh:268
const npstat::ArrayND< double > & getObservedData() const
Definition: SparseUnfoldingBandwidthScannerND.hh:254
double nDoFCorrectionFactor() const
Definition: SparseUnfoldingBandwidthScannerND.hh:318
virtual bool lastSparseUnfoldingStatus() const
Definition: SparseUnfoldingBandwidthScannerND.hh:271
const npstat::ArrayND< double > & getBias() const
Definition: SparseUnfoldingBandwidthScannerND.hh:265
void setInitialApproximation(const npstat::ArrayND< double > &a)
Definition: SparseUnfoldingBandwidthScannerND.hh:306
Definition: trlanEigensystem.hh:24
ArrayND & uninitialize()
Definition: MemoizingSymbetaFilterProvider.hh:27
Definition: AbsSparseUnfoldingFilterND.hh:25
Determination of eigenvalues/vectors of covariance matrices with TRLAN.