npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
dampedNewtonForNeymanOSDE1D.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_DAMPEDNEWTONFORNEYMANOSDE1D_HH_
2 #define NPSTAT_DAMPEDNEWTONFORNEYMANOSDE1D_HH_
3 
4 /*!
5 // \file dampedNewtonForNeymanOSDE1D.hh
6 //
7 // \brief Damped Newton's method for the Neyman OSDE
8 //
9 // Author: I. Volobouev
10 //
11 // March 2023
12 */
13 
14 #include <cmath>
15 #include <cassert>
16 #include <utility>
17 #include <algorithm>
18 
19 #include "npstat/nm/maxAbsValue.hh"
20 
25 
26 namespace npstat {
27  // The class "ConvergenceCalculator" must have a method
28  // "bool converged(...) const" with the same signature as the
29  // "converged" method of class "GradientNeymanOSDE1DConvergenceCalc".
30  template<class ConvergenceCalculator>
31  inline NeymanOSDE1DResult dampedNewtonForNeymanOSDE1D(
32  const NeymanOSDE1D& nosde,
33  const double* initialCoeffs, const unsigned nCoeffs,
34  const double* shrinkages, const unsigned nShrinkages,
35  const ConvergenceCalculator& conv, const unsigned maxIterations,
36  const double dampingCoefficient = 1.0,
37  const bool useGradientDescent = false)
38  {
39  assert(dampingCoefficient > 0.0);
40 
41  std::vector<double> coeffs(initialCoeffs, initialCoeffs+nCoeffs);
42  std::vector<double> polyStats(nShrinkages);
43  std::vector<double> prevGradient(nCoeffs);
44  std::vector<double> currentGradient(nCoeffs);
45  std::vector<double> hessianEigenvalues(nCoeffs);
46  std::vector<double> step(nCoeffs);
47  Matrix<double> hessian(nCoeffs, nCoeffs);
48  Matrix<double> hessianEigenvectors(nCoeffs, nCoeffs);
49 
50  double prevChisq = nosde.chiSquare(&coeffs[0], nCoeffs,
51  shrinkages, nShrinkages,
52  &polyStats[0], &prevGradient[0],
53  &hessian);
54  bool converged = false, badHessian = false;
55  unsigned iter = 0;
56  for (; iter<maxIterations && !converged; ++iter)
57  {
58  hessian.symEigen(&hessianEigenvalues[0], nCoeffs, &hessianEigenvectors);
59  for (unsigned i=0; i<nCoeffs && !badHessian; ++i)
60  {
61  if (hessianEigenvalues[i] <= 0.0)
62  badHessian = true;
63  else
64  hessianEigenvalues[i] = 1.0/hessianEigenvalues[i];
65  }
66 
67  if (badHessian)
68  {
69  // Try to switch to gradient descent.
70  // Note that gradient descent by itself is very slow.
71  // We are going to terminate it as soon as it brings
72  // the search into a region with a positive-definite
73  // Hessian.
74  if (useGradientDescent)
75  {
76  HessianNeymanOSDE1DConvergenceCalc<ConvergenceCalculator> descentConv(conv);
77  const NeymanOSDE1DResult& r = gradientDescentForNeymanOSDE1D(
78  nosde, &coeffs[0], nCoeffs, shrinkages, nShrinkages,
79  descentConv, maxIterations);
80  if (r.converged())
81  {
82  if (descentConv.otherCalcConverged())
83  return r;
84  else
85  {
86  // Recalculate the Hessian
87  for (unsigned i=0; i<nCoeffs; ++i)
88  coeffs[i] = r.coeffs()[i];
89  prevChisq = nosde.chiSquare(&coeffs[0], nCoeffs,
90  shrinkages, nShrinkages,
91  &polyStats[0], &prevGradient[0],
92  &hessian);
93  badHessian = false;
94  }
95  }
96  }
97 
98  // Did we fix the Hessian? If yes, the new Hessian
99  // has to be inverted, so we have to go to the
100  // beginning of the cycle. If not, we can't do much
101  // with this optimization method.
102  if (badHessian)
103  break;
104  else
105  continue;
106  }
107 
108  const Matrix<double>& invEigen = diag(&hessianEigenvalues[0], nCoeffs);
109  const Matrix<double>& invHess = invEigen.bilinear(hessianEigenvectors);
110  invHess.timesVector(&prevGradient[0], nCoeffs, &step[0], nCoeffs);
111  for (unsigned i=0; i<nCoeffs; ++i)
112  {
113  step[i] *= -dampingCoefficient;
114  coeffs[i] += step[i];
115  }
116 
117  const double currentChisq = nosde.chiSquare(&coeffs[0], nCoeffs,
118  shrinkages, nShrinkages,
119  &polyStats[0], &currentGradient[0],
120  &hessian);
121  converged = conv.converged(iter, prevChisq, prevGradient, step,
122  currentChisq, currentGradient, hessian);
123 
124  prevChisq = currentChisq;
125  std::swap(prevGradient, currentGradient);
126  }
127 
128  return NeymanOSDE1DResult(coeffs, polyStats, prevChisq,
129  maxAbsValue(&prevGradient[0], prevGradient.size()),
130  NOSDE_NEWTON, iter, converged, badHessian);
131  }
132 }
133 
134 #endif // NPSTAT_DAMPEDNEWTONFORNEYMANOSDE1D_HH_
Classes for establishing convergence of the Neyman OSDE optimization algorithms.
Class for representing results of the Neyman OSDE optimization algorithms.
OSDE based on minimizing the expected ISE of the empirical comparison density.
Gradient descent method for the Neyman OSDE. Not very well thought out, so it is very slow.
Maximum absolute value in an array.
Definition: AbsArrayProjector.hh:14
Matrix< Numeric > diag(const Numeric *data, unsigned dataLen)