npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
LocalLogisticRegression.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_LOCALLOGISTICREGRESSION_HH_
2 #define NPSTAT_LOCALLOGISTICREGRESSION_HH_
3 
4 /*!
5 // \file LocalLogisticRegression.hh
6 //
7 // \brief Facilities for performing local linear and quadratic
8 // logistic regression
9 //
10 // Classes declared in this header can be used with the
11 // top-level driver functions minuitUnbinnedLogisticRegression
12 // and minuitLogisticRegressionOnGrid declared in the
13 // minuitLocalRegression.hh header (interfaces section).
14 //
15 // Author: I. Volobouev
16 //
17 // March 2010
18 */
19 
20 #include "npstat/nm/ArrayND.hh"
21 #include "npstat/nm/KDTree.hh"
22 
24 
25 namespace npstat {
26  /** Base class for logistic regression */
27  template <class Numeric>
29  {
30  public:
32  bool calculateLikelihoodGradient);
33  inline virtual ~LogisticRegressionBase() {}
34 
35  //@{
36  /** Inspect object properties */
37  inline unsigned dim() const {return mydim_;}
38  inline const QuadraticOrthoPolyND& getPoly() const {return poly_;}
39  inline bool calculatingGradient() const {return calcGradient_;}
40  //@}
41 
42  //@{
43  /** Inspect results of the most recent fit */
44  inline const std::vector<double>& lastCoeffs() const {return coeffs_;}
45  void getGradient(double* buffer, unsigned bufLen) const;
46  inline double getPassCount() const
47  {return static_cast<double>(passCount_);}
48  inline double getFailCount() const
49  {return static_cast<double>(failCount_);}
50  //@}
51 
52  /**
53  // Only the data inside the regression box will be used
54  // in the fit (and some points may be subsequently weighted
55  // down to zero by the weight function).
56  //
57  // Note that the "setRegressionBox" method automatically
58  // sets up the standard mapping: the edges of the
59  // regression box are mapped into the boundaries which
60  // were used to create the orthogonal polynomials. If the
61  // standard mapping is not what you want (for example,
62  // this might be the case near the boundaries of the
63  // fitted region), you must call "setLinearMapping" after
64  // calling "setRegressionBox" in order to override the
65  // standard mapping.
66  //
67  // This method must be called at least once after the object
68  // is constructed and before any call to "calculateLogLikelihood".
69  */
70  void setRegressionBox(const BoxND<Numeric>& box);
71 
72  /**
73  // The next method sets up a mapping from the fitted region
74  // into the region which was used to create the orthogonal
75  // polynomials. The mapping is linear in every coordinate
76  // separately: ortho[i] = fitted[i]*scale[i] + location[i].
77  // Note that the "setRegressionBox" method automatically sets
78  // up the default mapping which will be adequate for most
79  // applications.
80  */
81  void setLinearMapping(const double* location, const double* scale,
82  unsigned locationAndScaleArraySize);
83 
84  /**
85  // The next method calculates the negative log-likelihood of
86  // the local logistic regression fit for the current mapping.
87  // In case the corresponding flag was set "true" in the constructor,
88  // the gradient of this quantity will be calculated as well.
89  // The gradient can be subsequently retrieved with the "getGradient"
90  // method.
91  */
92  double calculateLogLikelihood(const double* coeffs, unsigned nCoeffs);
93 
94  protected:
95  void resetAccumulators();
96 
97  const QuadraticOrthoPolyND& poly_;
98 
99  // Various vectors with size equal to the space dimensionality
100  BoxND<Numeric> regressionBox_;
101  std::vector<double> location_;
102  std::vector<double> scale_;
103  std::vector<double> coords_;
104 
105  // Various vectors with size equal to the number of orthogonal
106  // polynomials
107  std::vector<double> coeffs_;
108  std::vector<double> gradBuffer_;
109  std::vector<long double> gradient_;
110 
111  // Main accumulator for the likelihood
112  long double logli_;
113 
114  // Counters for passing and failing entries
115  long double passCount_;
116  long double failCount_;
117 
118  // Stuff which does not change
119  const double minlog_;
120  const double maxlog_;
121  const unsigned mydim_;
122  const bool calcGradient_;
123 
124  private:
128 
129  virtual void makeStandardMapping();
130  virtual void actualLogliCalculation() = 0;
131 
132  bool mappingIsDone_;
133  };
134 
135 
136  /** Logistic regression on data samples arranged into k-d trees */
137  template<class Point, class Numeric, class BooleanFunctor>
139  public AbsVisitor<Point, double>
140  {
141  public:
142  /**
143  // Constructor arguments are as follows:
144  //
145  // dataTree -- the tree of data points.
146  //
147  // pointPassesOrFails -- a functor that tells us whether the
148  // point "passes" (observed value of 1)
149  // or "fails" (observed value of 0).
150  //
151  // poly -- the set of orthogonal polynomials used to
152  // construct the local regression surface.
153  //
154  // calculateLikelihoodGradient -- flag which tells whether the
155  // code should calculate the gradient
156  // of log-likelihood with respect to
157  // coefficients of the local polynomial.
158  //
159  // This object will not own "dataTree" or "poly" objects. These
160  // objects must still exist when the LogisticRegressionOnKDTree
161  // object is in use. The point selection functor will be copied.
162  */
164  const KDTree<Point,Numeric>& dataTree,
165  const BooleanFunctor& pointPassesOrFails,
166  const QuadraticOrthoPolyND& poly,
167  bool calculateLikelihoodGradient);
168 
169  inline virtual ~LogisticRegressionOnKDTree() {}
170 
171  /** Inspect the data */
172  inline const KDTree<Point,Numeric>& getDataTree() const
173  {return dataTree_;}
174 
175  //@{
176  /** Method from AbsVisitor we have to implement */
177  inline virtual void clear() {this->resetAccumulators();}
178  inline virtual double result()
179  {return static_cast<double>(-this->logli_);}
180  virtual void process(const Point& value);
181  //@}
182 
183  private:
184  const KDTree<Point,Numeric>& dataTree_;
185  BooleanFunctor pointPassesOrFails_;
186 
187  inline void actualLogliCalculation()
188  {dataTree_.visitInBox(*this, this->regressionBox_);}
189  };
190 
191 
192  /** Logistic regression on regularly sampled data */
193  template <typename Numeric, unsigned StackLen=1U, unsigned StackDim=10U>
195  public AbsArrayProjector<Numeric, double>
196  {
197  public:
198  /**
199  // Constructor arguments are as follows:
200  //
201  // numerator -- count of points which "pass" for
202  // this grid cell
203  //
204  // denominator -- count of all points associated with
205  // this grid cell
206  //
207  // poly -- the set of orthogonal polynomial used to
208  // construct the local regression surface.
209  //
210  // calculateLikelihoodGradient -- flag which tells whether
211  // the code should calculate the gradient
212  // of log-likelihood with respect to
213  // coefficients of the local polynomial.
214  //
215  // This object will not own "numerator", "denominator",
216  // or "poly" objects. These objects must still exist when
217  // the LogisticRegressionOnGrid object is in use.
218  //
219  // The "standard" situation here is that the regression
220  // box is simply taken to be an integer-sized box whose
221  // length is the same as the number of steps used to
222  // build the polynomials.
223  */
225  const ArrayND<Numeric,StackLen,StackDim>& numerator,
226  const ArrayND<Numeric,StackLen,StackDim>& denominator,
227  const QuadraticOrthoPolyND& poly,
228  bool calculateLikelihoodGradient);
229 
230  inline virtual ~LogisticRegressionOnGrid() {}
231 
232  //@{
233  /** Inspect object properties */
235  {return numerator_;}
236  inline const ArrayND<Numeric,StackLen,StackDim>& getDenominator() const
237  {return denominator_;}
238  //@}
239 
240  //@{
241  /** Method from AbsArrayProjector we have to implement */
242  inline virtual void clear() {this->resetAccumulators();}
243  inline virtual double result()
244  {return static_cast<double>(-this->logli_);}
245  virtual void process(const unsigned *index, unsigned indexLen,
246  unsigned long linearIndex, const Numeric& value);
247  //@}
248 
249  private:
250  const ArrayND<Numeric,StackLen,StackDim>& numerator_;
251  const ArrayND<Numeric,StackLen,StackDim>& denominator_;
252  const Numeric zero_;
253 
254  void makeStandardMapping();
255  inline void actualLogliCalculation()
256  {denominator_.processSubrange(*this, this->regressionBox_);}
257  };
258 }
259 
260 #include "npstat/stat/LocalLogisticRegression.icc"
261 
262 #endif // NPSTAT_LOCALLOGISTICREGRESSION_HH_
Arbitrary-dimensional array template.
k-d tree template
Multivariate quadratic orthogonal polynomials with arbitrary weight functions.
Definition: ArrayND.hh:93
void processSubrange(AbsArrayProjector< Numeric, Num2 > &f, const BoxND< Integer > &subrange) const
void visitInBox(AbsVisitor< Point, Result > &visitor, const BoxND< Numeric > &box) const
Definition: LocalLogisticRegression.hh:29
void setLinearMapping(const double *location, const double *scale, unsigned locationAndScaleArraySize)
unsigned dim() const
Definition: LocalLogisticRegression.hh:37
const std::vector< double > & lastCoeffs() const
Definition: LocalLogisticRegression.hh:44
double calculateLogLikelihood(const double *coeffs, unsigned nCoeffs)
void setRegressionBox(const BoxND< Numeric > &box)
Definition: LocalLogisticRegression.hh:196
const ArrayND< Numeric, StackLen, StackDim > & getNumerator() const
Definition: LocalLogisticRegression.hh:234
LogisticRegressionOnGrid(const ArrayND< Numeric, StackLen, StackDim > &numerator, const ArrayND< Numeric, StackLen, StackDim > &denominator, const QuadraticOrthoPolyND &poly, bool calculateLikelihoodGradient)
virtual double result()
Definition: LocalLogisticRegression.hh:243
virtual void process(const unsigned *index, unsigned indexLen, unsigned long linearIndex, const Numeric &value)
virtual void clear()
Definition: LocalLogisticRegression.hh:242
Definition: LocalLogisticRegression.hh:140
LogisticRegressionOnKDTree(const KDTree< Point, Numeric > &dataTree, const BooleanFunctor &pointPassesOrFails, const QuadraticOrthoPolyND &poly, bool calculateLikelihoodGradient)
virtual void process(const Point &value)
const KDTree< Point, Numeric > & getDataTree() const
Definition: LocalLogisticRegression.hh:172
virtual void clear()
Definition: LocalLogisticRegression.hh:177
virtual double result()
Definition: LocalLogisticRegression.hh:178
Definition: QuadraticOrthoPolyND.hh:24
Definition: AbsArrayProjector.hh:14
Definition: AbsArrayProjector.hh:21
Definition: AbsVisitor.hh:20
Definition: BoxND.hh:25