npstat is hosted by Hepforge, IPPP Durham
NPStat  5.10.0
sampleDistro1DWithWeight.hh
Go to the documentation of this file.
1 #ifndef NPSTAT_SAMPLEDISTRO1DWITHWEIGHT_HH_
2 #define NPSTAT_SAMPLEDISTRO1DWITHWEIGHT_HH_
3 
4 /*!
5 // \file sampleDistro1DWithWeight.hh
6 //
7 // \brief Acceptance-rejection sampling from a density with compensatory
8 // weights. Does not change the average but affects the variance.
9 //
10 // Author: I. Volobouev
11 //
12 // June 2022
13 */
14 
15 #include <cmath>
16 #include <vector>
17 #include <utility>
18 #include <cassert>
19 #include <stdexcept>
20 #include <climits>
21 
22 #include "npstat/nm/Triple.hh"
23 #include "npstat/nm/fcnOrConst.hh"
24 
27 
28 namespace npstat {
29  enum SampleSizeInterpretation
30  {
31  SIZE_IS_N_TRIES = 0,
32  SIZE_IS_N_GENERATED,
33  SIZE_IS_KISHS
34  };
35 
36  /**
37  // AcceptanceFunction is a functor which takes a double as
38  // an argument and returns a double on the [0, 1] interval.
39  //
40  // The elements of the returned triple are:
41  //
42  // first -- sum of weights
43  //
44  // second -- the Kish's effective sample size of the generated
45  // sample
46  //
47  // third -- the efficiency of the generator (the ratio between
48  // the number of points generated and the number of
49  // points attempted)
50  //
51  // In each generated point (the vector "sample" is filled with them),
52  // the first element of the pair is the point coordinate and the
53  // second element is the point weight.
54  */
55  template <class AcceptanceFunction>
57  const AbsDistribution1D& distro,
58  const AcceptanceFunction& fcn,
59  AbsRandomGenerator& rng,
60  const double sampleSize,
61  const SampleSizeInterpretation whichSize,
62  std::vector<std::pair<double,double> >* sample)
63  {
64  if (sampleSize < 0.0) throw std::invalid_argument(
65  "In npstat::sampleDistro1DWithWeight: "
66  "sample size can not be negative");
67  if (sample)
68  sample->clear();
69  if (sampleSize > 0.0)
70  {
71  assert(sample);
72 
73  long double kishLimit = LDBL_MAX;
74  unsigned long nTriesLimit = ULONG_MAX;
75  unsigned long nGeneratedLimit = ULONG_MAX;
76  double lastTryProb = 1.0;
77  double lastGenProb = 1.0;
78 
79  switch (whichSize)
80  {
81  case SIZE_IS_N_TRIES:
82  nTriesLimit = std::floor(sampleSize);
83  if (static_cast<double>(nTriesLimit) != sampleSize)
84  {
85  lastTryProb = sampleSize - nTriesLimit;
86  ++nTriesLimit;
87  }
88  break;
89 
90  case SIZE_IS_N_GENERATED:
91  nGeneratedLimit = std::floor(sampleSize);
92  if (static_cast<double>(nGeneratedLimit) != sampleSize)
93  {
94  lastGenProb = sampleSize - nGeneratedLimit;
95  ++nGeneratedLimit;
96  }
97  break;
98 
99  case SIZE_IS_KISHS:
100  kishLimit = sampleSize;
101  break;
102 
103  default:
104  assert(!"In npstat::sampleDistro1DWithWeight: "
105  "unhandled switch case. This is a bug. Please report.");
106  }
107 
108  unsigned long nTries = 0, nAccepted = 0;
109  long double effSize = 0.0L, sumW = 0.0L, sumWSq = 0.0L;
110  double tmp;
111 
112  while (effSize < kishLimit &&
113  nAccepted < nGeneratedLimit &&
114  nTries < nTriesLimit)
115  {
116  if (nTries + 1U == nTriesLimit)
117  if (rng() > lastTryProb)
118  break;
119 
120  distro.random(rng, &tmp);
121  const double sf = fcnOrConst(fcn, tmp);
122  assert(sf >= 0.0);
123  assert(sf <= 1.0);
124  if (sf > 0.0)
125  {
126  const double r = rng();
127  if (r <= sf)
128  {
129  ++nAccepted;
130  bool accepted = true;
131  if (nAccepted == nGeneratedLimit)
132  if (rng() > lastGenProb)
133  accepted = false;
134  if (accepted)
135  {
136  const double w = 1.0/sf;
137  sample->push_back(std::pair<double,double>(tmp, w));
138  sumW += w;
139  sumWSq += w*w;
140  effSize = sumW/sumWSq*sumW;
141  }
142  }
143  }
144 
145  ++nTries;
146  }
147 
148  double eff = 0.0;
149  if (nTries)
150  eff = static_cast<double>(nAccepted)/nTries;
151  return npstat::Triple<double,double,double>(sumW, effSize, eff);
152  }
153  else
154  return npstat::Triple<double,double,double>(0.0, 0.0, 0.0);
155  }
156 }
157 
158 #endif // NPSTAT_SAMPLEDISTRO1DWITHWEIGHT_HH_
Interface definition for 1-d continuous statistical distributions.
Interface definition for pseudo- and quasi-random number generators.
A simple analogue of std::pair with three components instead of two.
Templated utilities for use in various density estimation codes, etc.
Definition: AbsArrayProjector.hh:14
npstat::Triple< double, double, double > sampleDistro1DWithWeight(const AbsDistribution1D &distro, const AcceptanceFunction &fcn, AbsRandomGenerator &rng, const double sampleSize, const SampleSizeInterpretation whichSize, std::vector< std::pair< double, double > > *sample)
Definition: sampleDistro1DWithWeight.hh:56
Definition: AbsDistribution1D.hh:31
virtual unsigned random(AbsRandomGenerator &g, double *generatedRandom) const
Definition: AbsDistribution1D.hh:68
Definition: AbsRandomGenerator.hh:27
Definition: Triple.hh:18