openGPMP
Open Source Mathematics Package
probdist.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  *
3  * Project
4  * _____ _____ __ __ _____
5  * / ____| __ \| \/ | __ \
6  * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7  * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8  *| (_) | |_) | __/ | | | |__| | | | | | | |
9  * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10  * | |
11  * |_|
12  *
13  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14  *
15  * This software is licensed as described in the file LICENSE, which
16  * you should have received as part of this distribution. The terms
17  * among other details are referenced in the official documentation
18  * seen here : https://akielaries.github.io/openGPMP/ along with
19  * important files seen in this project.
20  *
21  * You may opt to use, copy, modify, merge, publish, distribute
22  * and/or sell copies of the Software, and permit persons to whom
23  * the Software is furnished to do so, under the terms of the
24  * LICENSE file. As this is an Open Source effort, all implementations
25  * must be of the same methodology.
26  *
27  *
28  *
29  * This software is distributed on an AS IS basis, WITHOUT
30  * WARRANTY OF ANY KIND, either express or implied.
31  *
32  ************************************************************************/
33 
34 #include <algorithm>
35 #include <cmath>
36 #include <limits>
37 #include <math.h>
38 #include <numeric>
41 #include <random>
42 #include <vector>
43 
44 float my_logf(float);
45 
46 /* compute inverse error functions with maximum error of 2.35793 ulp */
47 float erfinv(float a) {
48  float p, r, t;
49  t = fmaf(a, 0.0f - a, 1.0f);
50  t = my_logf(t);
51  if (fabsf(t) > 6.125f) { // maximum ulp error = 2.35793
52  p = 3.03697567e-10f; // 0x1.4deb44p-32
53  p = fmaf(p, t, 2.93243101e-8f); // 0x1.f7c9aep-26
54  p = fmaf(p, t, 1.22150334e-6f); // 0x1.47e512p-20
55  p = fmaf(p, t, 2.84108955e-5f); // 0x1.dca7dep-16
56  p = fmaf(p, t, 3.93552968e-4f); // 0x1.9cab92p-12
57  p = fmaf(p, t, 3.02698812e-3f); // 0x1.8cc0dep-9
58  p = fmaf(p, t, 4.83185798e-3f); // 0x1.3ca920p-8
59  p = fmaf(p, t, -2.64646143e-1f); // -0x1.0eff66p-2
60  p = fmaf(p, t, 8.40016484e-1f); // 0x1.ae16a4p-1
61  } else { // maximum ulp error = 2.35002
62  p = 5.43877832e-9f; // 0x1.75c000p-28
63  p = fmaf(p, t, 1.43285448e-7f); // 0x1.33b402p-23
64  p = fmaf(p, t, 1.22774793e-6f); // 0x1.499232p-20
65  p = fmaf(p, t, 1.12963626e-7f); // 0x1.e52cd2p-24
66  p = fmaf(p, t, -5.61530760e-5f); // -0x1.d70bd0p-15
67  p = fmaf(p, t, -1.47697632e-4f); // -0x1.35be90p-13
68  p = fmaf(p, t, 2.31468678e-3f); // 0x1.2f6400p-9
69  p = fmaf(p, t, 1.15392581e-2f); // 0x1.7a1e50p-7
70  p = fmaf(p, t, -2.32015476e-1f); // -0x1.db2aeep-3
71  p = fmaf(p, t, 8.86226892e-1f); // 0x1.c5bf88p-1
72  }
73  r = a * p;
74  return r;
75 }
76 
77 /* compute natural logarithm with a maximum error of 0.85089 ulp */
78 float my_logf(float a) {
79  float i, m, r, s, t;
80  int e;
81 
82  m = frexpf(a, &e);
83  if (m < 0.666666667f) { // 0x1.555556p-1
84  m = m + m;
85  e = e - 1;
86  }
87  i = (float)e;
88  /* m in [2/3, 4/3] */
89  m = m - 1.0f;
90  s = m * m;
91  /* Compute log1p(m) for m in [-1/3, 1/3] */
92  r = -0.130310059f; // -0x1.0ae000p-3
93  t = 0.140869141f; // 0x1.208000p-3
94  r = fmaf(r, s, -0.121484190f); // -0x1.f19968p-4
95  t = fmaf(t, s, 0.139814854f); // 0x1.1e5740p-3
96  r = fmaf(r, s, -0.166846052f); // -0x1.55b362p-3
97  t = fmaf(t, s, 0.200120345f); // 0x1.99d8b2p-3
98  r = fmaf(r, s, -0.249996200f); // -0x1.fffe02p-3
99  r = fmaf(t, m, r);
100  r = fmaf(r, m, 0.333331972f); // 0x1.5554fap-2
101  r = fmaf(r, m, -0.500000000f); // -0x1.000000p-1
102  r = fmaf(r, s, m);
103  r = fmaf(i, 0.693147182f, r); // 0x1.62e430p-1 // log(2)
104  if (!((a > 0.0f) && (a <= 3.40282346e+38f))) { // 0x1.fffffep+127
105  r = a + a; // silence NaNs if necessary
106  if (a < 0.0f)
107  r = (0.0f / 0.0f); // NaN
108  // if (a == 0.0f)
109  if (fabs(a - 0.0f) < std::numeric_limits<double>::epsilon()) {
110  r = (-1.0f / 0.0f); // -Inf
111  }
112  }
113  return r;
114 }
115 
116 double gpmp::stats::ProbDist::quantile_dist(double probability) {
117  if (probability <= 0.0 || probability >= 1.0) {
118  return 0.0; // Invalid input, return 0
119  }
120 
121  // Using erfinv for older C++ standards
122  double z = std::sqrt(2.0) * erfinv(2.0 * probability - 1.0);
123 
124  return z;
125 }
126 
127 double gpmp::stats::ProbDist::normal_PDF(double x, double mean, double stddev) {
128  // Implement the probability density function (PDF) for the normal
129  // distribution You can use standard libraries or existing implementations
130  // for this calculation Example using C++ standard library:
131  double exponent = -0.5 * std::pow((x - mean) / stddev, 2.0);
132  return (1.0 / (stddev * std::sqrt(2.0 * M_PI))) * std::exp(exponent);
133 }
134 
135 double gpmp::stats::ProbDist::normal_CDF(double x, double mean, double stddev) {
136  // Implement the cumulative distribution function (CDF) for the normal
137  // distribution You can use standard libraries or existing implementations
138  // for this calculation Example using C++ standard library:
139  return 0.5 * (1.0 + std::erf((x - mean) / (stddev * std::sqrt(2.0))));
140 }
141 
142 double gpmp::stats::ProbDist::uniform_CDF(size_t k, size_t n) {
143  if (k == 0 || k > n) {
144  return 0.0; // Invalid input, return 0
145  }
146 
147  return static_cast<double>(k) / (n + 1);
148 }
149 
150 double gpmp::stats::ProbDist::exp_PDF(double x, size_t k, double lambda) {
151  // Check if k is valid (k must be 1 for exponential distribution)
152  if (k != 1) {
153  // Return 0 if k is not 1, as exponential distribution is only defined
154  // for k = 1
155  return 0.0;
156  }
157 
158  // Check if lambda is non-positive
159  if (lambda <= 0) {
160  // Return 0 if lambda is non-positive
161  return 0.0;
162  }
163 
164  // Calculate the exponential PDF
165  return (k / lambda) * exp(-k * x);
166 }
167 
168 double gpmp::stats::ProbDist::emp_CDF(const std::vector<double> &data,
169  double x) {
170  size_t count = 0;
171  for (const auto &value : data) {
172  if (value <= x) {
173  count++;
174  }
175  }
176 
177  return static_cast<double>(count) / data.size();
178 }
179 
180 double gpmp::stats::ProbDist::emp_PMF(const std::vector<double> &data,
181  double x) {
182  size_t count = std::count(data.begin(), data.end(), x);
183  return static_cast<double>(count) / data.size();
184 }
185 
186 double gpmp::stats::ProbDist::inverse_emp_CDF(const std::vector<double> &data,
187  double p) {
188  if (data.empty() || p < 0.0 || p > 1.0) {
189  return 0.0; // Invalid input, return 0
190  }
191 
192  std::vector<double> sortedData = data;
193  std::sort(sortedData.begin(), sortedData.end());
194 
195  size_t index = static_cast<size_t>(p * (data.size() - 1));
196  return sortedData[index];
197 }
198 
199 double gpmp::stats::ProbDist::mle(const std::vector<double> &data) {
200  if (data.empty()) {
201  return 0.0; // Invalid input, return 0
202  }
203 
204  double sum = std::accumulate(data.begin(), data.end(), 0.0);
205  return sum / data.size();
206 }
207 
208 double gpmp::stats::ProbDist::mom(const std::vector<double> &data) {
209  if (data.empty()) {
210  return 0.0; // Invalid input, return 0
211  }
213 
214  double mean = desc.mean_arith(data);
215  double variance = desc.variance(data, mean);
216 
217  return mean - variance / 2.0;
218 }
219 
220 double gpmp::stats::ProbDist::mle_est(const std::vector<double> &data) {
221  // This is a placeholder, you can replace it with a specific M-estimation
222  // method
223  return mle(data);
224 }
225 
226 double gpmp::stats::ProbDist::mumv(const std::vector<double> &data) {
227  if (data.empty()) {
228  return 0.0; // Invalid input, return 0
229  }
230 
231  double sum = std::accumulate(data.begin(), data.end(), 0.0);
232  return sum / data.size();
233 }
234 
235 double gpmp::stats::ProbDist::median_uniased(const std::vector<double> &data) {
236  if (data.empty()) {
237  return 0.0; // Invalid input, return 0
238  }
239 
240  std::vector<double> sortedData = data;
241  std::sort(sortedData.begin(), sortedData.end());
242 
243  size_t size = sortedData.size();
244  if (size % 2 == 0) {
245  return (sortedData[size / 2 - 1] + sortedData[size / 2]) / 2.0;
246  } else {
247  return sortedData[size / 2];
248  }
249 }
250 
251 // Interval Estimation
252 std::pair<double, double>
253 gpmp::stats::ProbDist::ConfidenceInterval(const std::vector<double> &data,
254  double alpha) {
255  if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
256  return {0.0, 0.0}; // Invalid input, return an empty interval
257  }
258 
259  size_t n = data.size();
261  double mean = desc.mean_arith(data);
262  double stddev = desc.stdev(data, mean);
263 
264  // Assuming a normal distribution for simplicity
265  double z = quantile_dist(1 - alpha / 2.0);
266  double margin = z * stddev / std::sqrt(static_cast<double>(n));
267 
268  return {mean - margin, mean + margin};
269 }
270 
272  const std::vector<double> &data,
273  double pivotFunction(const std::vector<double> &)) {
274  if (data.empty()) {
275  return 0.0; // Invalid input, return 0
276  }
277 
278  return pivotFunction(data);
279 }
280 
281 // Example of a pivot function for Confidence Interval
283  const std::vector<double> &data) {
284  size_t n = data.size();
285 
287  double mean = desc.mean_arith(data);
288  double stddev = desc.stdev(data, mean);
289 
290  return mean + 2 * stddev / std::sqrt(static_cast<double>(n));
291 }
292 
293 std::pair<double, double>
294 gpmp::stats::ProbDist::LikelihoodInterval(const std::vector<double> &data,
295  double alpha) {
296  // Example implementation, this needs to be adapted based on the specific
297  // likelihood function
298  if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
299  return {0.0, 0.0}; // Invalid input, return an empty interval
300  }
301 
302  // Placeholder, implement likelihood function and find confidence bounds
303  double lowerBound = 0.0;
304  double upperBound = 1.0;
305 
306  return {lowerBound, upperBound};
307 }
308 
309 std::pair<double, double>
310 gpmp::stats::ProbDist::PredictionInterval(const std::vector<double> &data,
311  double alpha) {
312  if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
313  return {0.0, 0.0}; // Invalid input, return an empty interval
314  }
315 
316  // Placeholder, implement prediction interval calculation
317  double lowerBound = 0.0;
318  double upperBound = 1.0;
319 
320  return {lowerBound, upperBound};
321 }
322 
323 std::pair<double, double>
324 gpmp::stats::ProbDist::ToleranceInterval(const std::vector<double> &data,
325  double alpha) {
326  if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
327  return {0.0, 0.0}; // Invalid input, return an empty interval
328  }
329 
330  // Placeholder, implement tolerance interval calculation
331  double lowerBound = 0.0;
332  double upperBound = 1.0;
333 
334  return {lowerBound, upperBound};
335 }
A class providing methods for descriptive statistics.
Definition: describe.hpp:44
static double stdev(const std::vector< double > &data, double mean)
Calculates the standard deviation of a given dataset, given the mean.
Definition: describe.cpp:184
static double variance(const std::vector< double > &data, double mean)
Calculates the variance of a given dataset, given the mean.
Definition: describe.cpp:194
static double mean_arith(const std::vector< double > &data)
Calculates the arithmetic mean of a given dataset.
Definition: describe.cpp:52
double exp_PDF(double x, size_t k, double lambda)
Compute the probability density function (PDF) for the exponential distribution.
Definition: probdist.cpp:150
double normal_PDF(double x, double mean, double stddev)
Compute the probability density function (PDF) for the normal distribution.
Definition: probdist.cpp:127
double mle(const std::vector< double > &data)
Compute the Maximum Likelihood Estimate (MLE) for the mean of a dataset.
Definition: probdist.cpp:199
double mle_est(const std::vector< double > &data)
Placeholder function for M-estimation.
Definition: probdist.cpp:220
double emp_CDF(const std::vector< double > &data, double x)
Compute the empirical cumulative distribution function (CDF) for a given value.
Definition: probdist.cpp:168
std::pair< double, double > LikelihoodInterval(const std::vector< double > &data, double alpha)
Compute the likelihood interval for a given dataset and significance level.
Definition: probdist.cpp:294
double Pivot(const std::vector< double > &data, double pivotFunction(const std::vector< double > &))
Compute the value of a pivot function for interval estimation.
Definition: probdist.cpp:271
std::pair< double, double > PredictionInterval(const std::vector< double > &data, double alpha)
Compute the prediction interval for a given dataset and significance level.
Definition: probdist.cpp:310
double normal_CDF(double x, double mean, double stddev)
Compute the cumulative distribution function (CDF) for the normal distribution.
Definition: probdist.cpp:135
double uniform_CDF(size_t k, size_t n)
Compute the cumulative distribution function (CDF) for the uniform distribution.
Definition: probdist.cpp:142
double quantile_dist(double probability)
Compute the quantile of the standard normal distribution.
Definition: probdist.cpp:116
double median_uniased(const std::vector< double > &data)
Compute the median-unbiased estimate for the median of a dataset.
Definition: probdist.cpp:235
double inverse_emp_CDF(const std::vector< double > &data, double p)
Compute the inverse of the empirical cumulative distribution function (CDF) for a given probability.
Definition: probdist.cpp:186
double emp_PMF(const std::vector< double > &data, double x)
Compute the empirical probability mass function (PMF) for a given value.
Definition: probdist.cpp:180
double mom(const std::vector< double > &data)
Compute the method of moments (MOM) estimate for the mean of a dataset.
Definition: probdist.cpp:208
std::pair< double, double > ConfidenceInterval(const std::vector< double > &data, double alpha)
Compute the confidence interval for the mean of a dataset.
Definition: probdist.cpp:253
std::pair< double, double > ToleranceInterval(const std::vector< double > &data, double alpha)
Compute the tolerance interval for a given dataset and significance level.
Definition: probdist.cpp:324
double mumv(const std::vector< double > &data)
Compute the Mean-Unbiased Minimum-Variance (MUMV) estimate for the mean of a dataset.
Definition: probdist.cpp:226
double PivotFunctionForConfidenceInterval(const std::vector< double > &data)
Example pivot function for computing a confidence interval.
Definition: probdist.cpp:282
float erfinv(float a)
Definition: probdist.cpp:47
float my_logf(float)
Definition: probdist.cpp:78