npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
AbsNtuple.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_ABSNTUPLE_HH_
2 #define NPSTAT_ABSNTUPLE_HH_
3 
4 /*!
5 // \file AbsNtuple.hh
6 //
7 // \brief Interface definition for homogeneous ntuples (point clouds)
8 //
9 // Author: I. Volobouev
10 //
11 // November 2010
12 */
13 
14 #include <string>
15 #include <vector>
16 #include <climits>
17 #include <typeinfo>
18 #include <iterator>
19 #include <stdexcept>
20 
21 #include "geners/ClassId.hh"
22 #include "geners/binaryIO.hh"
23 #include "geners/allUnique.hh"
24 
25 #include "npstat/stat/Column.hh"
26 
27 #ifdef SWIG
29 #endif // SWIG
30 
31 namespace npstat {
32  /**
33  // Interface class for ntuples. Here, ntuples are homogeneous 2-d tables
34  // in which the number of columns is fixed while the number of rows can
35  // grow dynamically.
36  */
37  template <typename T>
38  class AbsNtuple
39  {
40  public:
41  typedef T value_type;
42 
43  /**
44  // The constructor arguments are a vector of column names
45  // and an ntuple title c-string (which can be NULL)
46  */
47  inline AbsNtuple(const std::vector<std::string>& columnNames,
48  const char* ntTitle)
49  : colNames_(columnNames), title_(ntTitle ? ntTitle : "")
50  {
51  if (columnNames.empty())
52  throw std::invalid_argument("In npstat::AbsNtuple constructor:"
53  " no column labels provided");
54  if (!gs::allUnique(columnNames))
55  throw std::invalid_argument("In npstat::AbsNtuple constructor:"
56  " column labels are not unique");
57  }
58 
59  inline virtual ~AbsNtuple() {}
60 
61  /** Retrieve the ntuple title */
62  inline const std::string& title() const {return title_;}
63 
64  /** Set the ntuple title */
65  inline virtual void setTitle(const char* newtitle)
66  {title_ = newtitle ? newtitle : "";}
67 
68  /** Retrieve the number of columns */
69  inline unsigned long nColumns() const
70  {return colNames_.size();}
71 
72  /** Retrieve the name for the given column */
73  inline const std::string& columnName(const unsigned long i) const
74  {return colNames_.at(i);}
75 
76  /** Retrieve all column names */
77  inline const std::vector<std::string>& columnNames() const
78  {return colNames_;}
79 
80  /**
81  // The code will refuse to set the column name (and false will be
82  // returned in this case) if the provided name duplicates an existing
83  // column name. False will also be returned if the column index
84  // is out of range. Derived classes can also refuse to change the
85  // column name if their implementation relies in some way on the
86  // permanence of these names.
87  */
88  virtual bool setColumnName(unsigned long i, const char* newname);
89 
90  /**
91  // This method returns nColumns() in case the
92  // given column name is not valid
93  */
94  unsigned long columnNumber(const char* columnName) const;
95 
96  /**
97  // This method works just like columnNumber but
98  // generates a dynamic fault in case the given
99  // column name is invalid
100  */
101  unsigned long validColumn(const char* columnName) const;
102 
103  /** Retrieve the number of rows */
104  virtual unsigned long nRows() const = 0;
105 
106  /** Retrieve the total number of ntuple elements */
107  inline unsigned long length() const {return nRows()*colNames_.size();}
108 
109  /**
110  // The number of values provided, "lenValues", should be
111  // divisible by the number of columns. If it is not, the
112  // function should throw "std::invalid_argument" exception.
113  */
114  virtual void fill(const T* values, unsigned long lenValues) = 0;
115 
116  //@{
117  /**
118  // Convenience method which works if the number of arguments equals
119  // the number if colums (otherwise an exception will be thrown)
120  */
121  virtual void fill(const T& v0) = 0;
122  virtual void fill(const T& v0, const T& v1) = 0;
123  virtual void fill(const T& v0, const T& v1, const T& v2) = 0;
124  virtual void fill(const T& v0, const T& v1, const T& v2, const T& v3)=0;
125  virtual void fill(const T& v0, const T& v1, const T& v2, const T& v3,
126  const T& v4) = 0;
127  virtual void fill(const T& v0, const T& v1, const T& v2, const T& v3,
128  const T& v4, const T& v5) = 0;
129  virtual void fill(const T& v0, const T& v1, const T& v2, const T& v3,
130  const T& v4, const T& v5, const T& v6) = 0;
131  virtual void fill(const T& v0, const T& v1, const T& v2, const T& v3,
132  const T& v4, const T& v5, const T& v6, const T& v7)=0;
133  virtual void fill(const T& v0, const T& v1, const T& v2, const T& v3,
134  const T& v4, const T& v5, const T& v6, const T& v7,
135  const T& v8) = 0;
136  virtual void fill(const T& v0, const T& v1, const T& v2, const T& v3,
137  const T& v4, const T& v5, const T& v6, const T& v7,
138  const T& v8, const T& v9) = 0;
139  //@}
140 
141  /**
142  // Append contents of another ntuple. That ntuple must have
143  // the same number of columns. The copy constructor must exist
144  // which builds elements of this ntuple from the elements of
145  // another ntuple.
146  */
147  template <typename T2>
148  void append(const AbsNtuple<T2>& another);
149 
150  /**
151  // Access individual elements (no bounds checking).
152  // Note that access is by value, so it involves a fair
153  // amount of copying. The other alternative, return by
154  // reference, could lead to subtle bugs with references
155  // invalidated by fill operations and buffer swapping.
156  */
157  virtual T operator()(unsigned long r, unsigned long c) const=0;
158 
159  /** Access individual elements with bounds checking */
160  virtual T at(unsigned long r, unsigned long c) const = 0;
161 
162  /**
163  // Access with flexible column argument. The "Column" class has
164  // converting constructors from unsigned long, const char*, and
165  // std::string&. All these can now be used as the second argument.
166  // Note that this type of access will be slower than access by
167  // simple indices, so don't use this method in tight loops.
168  */
169  T element(unsigned long r, const Column& c) const;
170 
171  /** Similar method with bounds checking */
172  T elementAt(unsigned long r, const Column& c) const;
173 
174  //@{
175  /**
176  // Fetch copies of rows/columns. The buffer should be at least
177  // as large as the number of elements expected in return.
178  */
179  virtual void rowContents(unsigned long row, T* buf,
180  unsigned long lenBuf) const = 0;
181  virtual void columnContents(const Column& c, T* buf,
182  unsigned long lenBuf) const = 0;
183  //@}
184 
185  /**
186  // Clear the data (if possible). Note that certain disk-based
187  // implementations may ignore this. Check the number of rows
188  // to make sure that the data was indeed cleared.
189  */
190  virtual void clear() = 0;
191 
192  /**
193  // Iteration over column contents (cycles row numbers).
194  // Note that rowContents/columnContents methods to access
195  // the data are going to work faster than these iterators.
196  */
198  {
199  public:
200  typedef T value_type;
201  typedef std::forward_iterator_tag iterator_category;
202 
203  column_iterator();
204 
205  T operator*() const;
206  column_iterator& operator++();
207  column_iterator operator++(int);
208  bool operator==(const column_iterator&) const;
209  bool operator!=(const column_iterator&) const;
210  bool operator<(const column_iterator&) const;
211 
212  private:
213  friend class AbsNtuple;
214  const AbsNtuple<T>* nt_;
215  unsigned long column_;
216  unsigned long row_;
217  };
218 
219  /** Iteration over row contents (cycles column numbers) */
221  {
222  public:
223  typedef T value_type;
224  typedef std::forward_iterator_tag iterator_category;
225 
226  row_iterator();
227 
228  T operator*() const;
229  row_iterator& operator++();
230  row_iterator operator++(int);
231  bool operator==(const row_iterator&) const;
232  bool operator!=(const row_iterator&) const;
233  bool operator<(const row_iterator&) const;
234 
235  private:
236  friend class AbsNtuple;
237  const AbsNtuple<T>* nt_;
238  unsigned long column_;
239  unsigned long row_;
240  };
241 
242  //@{
243  /**
244  // Methods which return begin/end positions for
245  // the row/column iterators
246  */
247  row_iterator row_begin(unsigned long rowNumber) const;
248  row_iterator row_end() const;
249  column_iterator column_begin(const Column& column) const;
250  column_iterator column_end() const;
251  //@}
252 
253  /**
254  // Function for cycling over all rows. The accumulator class
255  // must implement the following function:
256  //
257  // void accumulate(T* rowContents, unsigned long nColumns)
258  */
259  template <class Accumulator>
260  void cycleOverRows(Accumulator& acc) const;
261 
262  /**
263  // Same as cycleOverRows, but the accumulator will be called
264  // only if the filter returns "true". The filter must be
265  // a functor which implements
266  //
267  // bool operator()(unsigned long rowNumber,
268  // const T* rowContents, unsigned long nColumns) const
269  //
270  // The function returns the number of rows passing the filter.
271  */
272  template <class Filter, class Accumulator>
274  const Filter& f, Accumulator& acc) const;
275 
276  /** Cycle over rows counting how many rows pass the filter */
277  template <class Filter>
278  unsigned long conditionalRowCount(const Filter& f) const;
279 
280  /**
281  // Another row cycling function which employs a functor that
282  // calculates a weight. The weight functor must implement
283  //
284  // double operator()(unsigned long rowNumber,
285  // const T* rowContents, unsigned long nCols) const
286  //
287  // The returned weights must be non-negative. The accumulator must
288  // implement
289  //
290  // void accumulate(T* rowContents, unsigned long nColumns, double w)
291  //
292  // The option "skipZeroWeights" allows the user not to call the
293  // accumulator if the calculated weight is 0.
294  */
295  template <class Accumulator, class WeightCalc>
296  void weightedCycleOverRows(Accumulator& acc, const WeightCalc& wcalc,
297  bool skipZeroWeights = false) const;
298 
299  /**
300  // This method returns the sum of weights for rows
301  // that pass the filter and call the accumulator as appropriate
302  */
303  template <class Filter, class Accumulator, class WeightCalc>
305  const Filter& f, Accumulator& acc, const WeightCalc& wcalc,
306  bool skipZeroWeights = false) const;
307 
308  /**
309  // This method just returns the sum of weights for rows
310  // that pass the filter
311  */
312  template <class Filter, class WeightCalc>
314  const Filter& f, const WeightCalc& wcalc) const;
315 
316  //@{
317  /**
318  // Convenience method which returns a collection of column
319  // indices using a set of inhomogeneous column descriptions as
320  // an argument. Exception will be thrown if the column
321  // does not exist.
322  */
323  std::vector<unsigned long> columnIndices(const Column& c0) const;
324  std::vector<unsigned long> columnIndices(const Column& c0,
325  const Column& c1) const;
326  std::vector<unsigned long> columnIndices(const Column& c0,
327  const Column& c1,
328  const Column& c2) const;
329  std::vector<unsigned long> columnIndices(const Column& c0,
330  const Column& c1,
331  const Column& c2,
332  const Column& c3) const;
333  std::vector<unsigned long> columnIndices(const Column& c0,
334  const Column& c1,
335  const Column& c2,
336  const Column& c3,
337  const Column& c4) const;
338  std::vector<unsigned long> columnIndices(const Column& c0,
339  const Column& c1,
340  const Column& c2,
341  const Column& c3,
342  const Column& c4,
343  const Column& c5) const;
344  std::vector<unsigned long> columnIndices(const Column& c0,
345  const Column& c1,
346  const Column& c2,
347  const Column& c3,
348  const Column& c4,
349  const Column& c5,
350  const Column& c6) const;
351  std::vector<unsigned long> columnIndices(const Column& c0,
352  const Column& c1,
353  const Column& c2,
354  const Column& c3,
355  const Column& c4,
356  const Column& c5,
357  const Column& c6,
358  const Column& c7) const;
359  std::vector<unsigned long> columnIndices(const Column& c0,
360  const Column& c1,
361  const Column& c2,
362  const Column& c3,
363  const Column& c4,
364  const Column& c5,
365  const Column& c6,
366  const Column& c7,
367  const Column& c8) const;
368  std::vector<unsigned long> columnIndices(const Column& c0,
369  const Column& c1,
370  const Column& c2,
371  const Column& c3,
372  const Column& c4,
373  const Column& c5,
374  const Column& c6,
375  const Column& c7,
376  const Column& c8,
377  const Column& c9) const;
378  //@}
379 
380  /**
381  // Convenience method which returns a collection of column
382  // indices using a vector of column names as an argument.
383  */
384  std::vector<unsigned long> columnIndices(
385  const std::vector<std::string>& colNames) const;
386 
387  /** Prototype needed for I/O */
388  virtual gs::ClassId classId() const = 0;
389 
390  /**
391  // Comparison for equality. Do not override in derived classes
392  // (override "isEqual" method instead)
393  */
394  inline bool operator==(const AbsNtuple& r) const
395  {return (typeid(*this) == typeid(r)) && this->isEqual(r);}
396 
397  /** Logical negation of operator== */
398  inline bool operator!=(const AbsNtuple& r) const
399  {return !(*this == r);}
400 
401  protected:
402  /**
403  // Comparison for equality to be overriden by the derived
404  // classes. Don't forget to call "isEqual" method of the
405  // base class.
406  */
407  virtual bool isEqual(const AbsNtuple& r) const;
408 
409  private:
410  std::vector<std::string> colNames_;
411  std::string title_;
412 
413  // Before using "AppendNTuple", check compatibility
414  // of the number of columns
415  template<int, class T2>
416  struct AppendNTuple
417  {
418  static inline void append(AbsNtuple* nt,
419  const AbsNtuple<T2>& other)
420  {
421  const unsigned long nRows = other.nRows();
422  if (nRows)
423  {
424  // We need to create buffers carefully, so that this code
425  // can work for objects without default constructors
426  const unsigned long nCols = other.nColumns();
427  std::vector<T2> bufVec;
428  bufVec.reserve(nCols);
429  std::vector<T> myBufVec;
430  myBufVec.reserve(nCols);
431  for (unsigned long col=0; col<nCols; ++col)
432  bufVec.push_back(other(0UL, col));
433  for (unsigned long col=0; col<nCols; ++col)
434  myBufVec.emplace_back(bufVec[col]);
435  T2* buf = &bufVec[0];
436  T* myBuf = &myBufVec[0];
437  nt->fill(myBuf, nCols);
438  for (unsigned long row=1UL; row<nRows; ++row)
439  {
440  other.rowContents(row, buf, nCols);
441  for (unsigned long col=0; col<nCols; ++col)
442  myBuf[col] = T(buf[col]);
443  nt->fill(myBuf, nCols);
444  }
445  }
446  }
447  };
448 
449  // Faster version of "AppendNTuple" which will be called
450  // when T and T2 types are the same
451  template<class T2>
452  struct AppendNTuple<1, T2>
453  {
454  static inline void append(AbsNtuple* nt,
455  const AbsNtuple<T2>& other)
456  {
457  const unsigned long nRows = other.nRows();
458  if (nRows)
459  {
460  const unsigned long nCols = other.nColumns();
461  std::vector<T2> bufVec;
462  bufVec.reserve(nCols);
463  for (unsigned long col=0; col<nCols; ++col)
464  bufVec.push_back(other(0UL, col));
465  T2* buf = &bufVec[0];
466  nt->fill(buf, nCols);
467  for (unsigned long row=1UL; row<nRows; ++row)
468  {
469  other.rowContents(row, buf, nCols);
470  nt->fill(buf, nCols);
471  }
472  }
473  }
474  };
475 
476 #ifdef SWIG
477  public:
478  template <class Accumulator>
479  inline unsigned long cutCycleOverRows(
480  const NtRectangularCut<T>& f, Accumulator& acc) const
481  {
482  return conditionalCycleOverRows(f, acc);
483  }
484 
485  inline void append2(const AbsNtuple& other)
486  {
487  append(other);
488  }
489 #endif // SWIG
490  };
491 
492  //@{
493  /**
494  // Function for dumping ntuples into text files, one row per line.
495  // By default, column values will be separated by a single white
496  // space. If "insertCommasBetweenValues" is "true" then column
497  // values will be separated by ", ". Only the data is dumped, not
498  // the info about the ntuple structure.
499  //
500  // This function will only work with T objects that have default
501  // constructors. "true" is returned on success, "false" on failure.
502  */
503  template <typename T>
504  bool dumpNtupleAsText(const AbsNtuple<T>& ntuple,
505  std::ostream& asciiStream,
506  bool insertCommasBetweenValues=false,
507  unsigned long firstRowToDump=0,
508  unsigned long maxRowsToDump=ULONG_MAX);
509  template <typename T>
510  bool dumpNtupleAsTextFile(const AbsNtuple<T>& ntuple,
511  const std::string& filename,
512  bool insertCommasBetweenValues=false,
513  unsigned long firstRowToDump=0,
514  unsigned long maxRowsToDump=ULONG_MAX);
515  //@}
516 
517  //@{
518  /**
519  // Function for filling ntuples from text files, one row per line.
520  // Will work with T objects that have default constructors.
521  // "true" is returned on success, "false" on failure.
522  //
523  // There may be more columns (but not less) in the file than
524  // in the ntuple. In this case extra columns are ignored.
525  //
526  // Empty lines, lines which consist of pure white space, and lines
527  // which start with an arbitrary amount of white space (including
528  // none) followed by '#' are ignored (considered comments).
529  */
530  template <typename T>
531  bool fillNtupleFromText(std::istream& asciiStream,
532  AbsNtuple<T>* ntuple,
533  bool hasCommasBetweenValues=false,
534  unsigned long maxRowsToFill=ULONG_MAX);
535  template <typename T>
536  bool fillNtupleFromTextFile(const std::string& filename,
537  AbsNtuple<T>* ntuple,
538  bool hasCommasBetweenValues=false,
539  unsigned long maxRowsToFill=ULONG_MAX);
540  //@}
541 
542  //@{
543  /**
544  // Convenience function for creating vectors of std::string
545  // using variable number of arguments (from 1 to 10 here)
546  */
547  std::vector<std::string> ntupleColumns(const char* v0);
548  std::vector<std::string> ntupleColumns(const char* v0, const char* v1);
549  std::vector<std::string> ntupleColumns(const char* v0, const char* v1,
550  const char* v2);
551  std::vector<std::string> ntupleColumns(const char* v0, const char* v1,
552  const char* v2, const char* v3);
553  std::vector<std::string> ntupleColumns(const char* v0, const char* v1,
554  const char* v2, const char* v3,
555  const char* v4);
556  std::vector<std::string> ntupleColumns(const char* v0, const char* v1,
557  const char* v2, const char* v3,
558  const char* v4, const char* v5);
559  std::vector<std::string> ntupleColumns(const char* v0, const char* v1,
560  const char* v2, const char* v3,
561  const char* v4, const char* v5,
562  const char* v6);
563  std::vector<std::string> ntupleColumns(const char* v0, const char* v1,
564  const char* v2, const char* v3,
565  const char* v4, const char* v5,
566  const char* v6, const char* v7);
567  std::vector<std::string> ntupleColumns(const char* v0, const char* v1,
568  const char* v2, const char* v3,
569  const char* v4, const char* v5,
570  const char* v6, const char* v7,
571  const char* v8);
572  std::vector<std::string> ntupleColumns(const char* v0, const char* v1,
573  const char* v2, const char* v3,
574  const char* v4, const char* v5,
575  const char* v6, const char* v7,
576  const char* v8, const char* v9);
577  std::vector<std::string> ntupleColumns(const char** names, unsigned len);
578  //@}
579 
580  /** Generate column names "c0", "c1", ..., "cM", where M = ncols - 1 */
581  std::vector<std::string> simpleColumnNames(unsigned ncols);
582 }
583 
584 #include "npstat/stat/Column.icc"
585 #include "npstat/stat/AbsNtuple.icc"
586 
587 #endif // NPSTAT_ABSNTUPLE_HH_
Address ntuple columns by name or by number.
Rectangular cuts for homogeneous ntuples.
Definition: AbsNtuple.hh:198
Definition: AbsNtuple.hh:221
Definition: AbsNtuple.hh:39
unsigned long validColumn(const char *columnName) const
std::vector< unsigned long > columnIndices(const Column &c0) const
const std::string & columnName(const unsigned long i) const
Definition: AbsNtuple.hh:73
virtual void rowContents(unsigned long row, T *buf, unsigned long lenBuf) const =0
const std::vector< std::string > & columnNames() const
Definition: AbsNtuple.hh:77
unsigned long conditionalRowCount(const Filter &f) const
virtual void setTitle(const char *newtitle)
Definition: AbsNtuple.hh:65
void append(const AbsNtuple< T2 > &another)
virtual bool setColumnName(unsigned long i, const char *newname)
unsigned long columnNumber(const char *columnName) const
virtual gs::ClassId classId() const =0
row_iterator row_begin(unsigned long rowNumber) const
T elementAt(unsigned long r, const Column &c) const
bool operator!=(const AbsNtuple &r) const
Definition: AbsNtuple.hh:398
virtual void fill(const T *values, unsigned long lenValues)=0
virtual void clear()=0
double weightedConditionalRowCount(const Filter &f, const WeightCalc &wcalc) const
void weightedCycleOverRows(Accumulator &acc, const WeightCalc &wcalc, bool skipZeroWeights=false) const
bool operator==(const AbsNtuple &r) const
Definition: AbsNtuple.hh:394
unsigned long length() const
Definition: AbsNtuple.hh:107
virtual unsigned long nRows() const =0
T element(unsigned long r, const Column &c) const
virtual T operator()(unsigned long r, unsigned long c) const =0
virtual T at(unsigned long r, unsigned long c) const =0
unsigned long conditionalCycleOverRows(const Filter &f, Accumulator &acc) const
virtual bool isEqual(const AbsNtuple &r) const
unsigned long nColumns() const
Definition: AbsNtuple.hh:69
AbsNtuple(const std::vector< std::string > &columnNames, const char *ntTitle)
Definition: AbsNtuple.hh:47
virtual void fill(const T &v0)=0
double weightedConditionalCycleOverRows(const Filter &f, Accumulator &acc, const WeightCalc &wcalc, bool skipZeroWeights=false) const
std::vector< unsigned long > columnIndices(const std::vector< std::string > &colNames) const
void cycleOverRows(Accumulator &acc) const
const std::string & title() const
Definition: AbsNtuple.hh:62
Definition: Column.hh:30
Definition: AbsArrayProjector.hh:14
std::vector< std::string > simpleColumnNames(unsigned ncols)
bool fillNtupleFromText(std::istream &asciiStream, AbsNtuple< T > *ntuple, bool hasCommasBetweenValues=false, unsigned long maxRowsToFill=ULONG_MAX)
bool dumpNtupleAsText(const AbsNtuple< T > &ntuple, std::ostream &asciiStream, bool insertCommasBetweenValues=false, unsigned long firstRowToDump=0, unsigned long maxRowsToDump=ULONG_MAX)
std::vector< std::string > ntupleColumns(const char *v0)