Line data Source code
1 : /*************************************************************************
2 : *
3 : * Project
4 : * _____ _____ __ __ _____
5 : * / ____| __ \| \/ | __ \
6 : * ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
7 : * / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
8 : *| (_) | |_) | __/ | | | |__| | | | | | | |
9 : * \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
10 : * | |
11 : * |_|
12 : *
13 : * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
14 : *
15 : * This software is licensed as described in the file LICENSE, which
16 : * you should have received as part of this distribution. The terms
17 : * among other details are referenced in the official documentation
18 : * seen here : https://akielaries.github.io/openGPMP/ along with
19 : * important files seen in this project.
20 : *
21 : * You may opt to use, copy, modify, merge, publish, distribute
22 : * and/or sell copies of the Software, and permit persons to whom
23 : * the Software is furnished to do so, under the terms of the
24 : * LICENSE file. As this is an Open Source effort, all implementations
25 : * must be of the same methodology.
26 : *
27 : *
28 : *
29 : * This software is distributed on an AS IS basis, WITHOUT
30 : * WARRANTY OF ANY KIND, either express or implied.
31 : *
32 : ************************************************************************/
33 :
34 : #include <algorithm>
35 : #include <cmath>
36 : #include <limits>
37 : #include <math.h>
38 : #include <numeric>
39 : #include <openGPMP/stats/describe.hpp>
40 : #include <openGPMP/stats/probdist.hpp>
41 : #include <random>
42 : #include <vector>
43 :
44 : float my_logf(float);
45 :
46 : /* compute inverse error functions with maximum error of 2.35793 ulp */
47 2 : float erfinv(float a) {
48 : float p, r, t;
49 2 : t = fmaf(a, 0.0f - a, 1.0f);
50 2 : t = my_logf(t);
51 2 : if (fabsf(t) > 6.125f) { // maximum ulp error = 2.35793
52 0 : p = 3.03697567e-10f; // 0x1.4deb44p-32
53 0 : p = fmaf(p, t, 2.93243101e-8f); // 0x1.f7c9aep-26
54 0 : p = fmaf(p, t, 1.22150334e-6f); // 0x1.47e512p-20
55 0 : p = fmaf(p, t, 2.84108955e-5f); // 0x1.dca7dep-16
56 0 : p = fmaf(p, t, 3.93552968e-4f); // 0x1.9cab92p-12
57 0 : p = fmaf(p, t, 3.02698812e-3f); // 0x1.8cc0dep-9
58 0 : p = fmaf(p, t, 4.83185798e-3f); // 0x1.3ca920p-8
59 0 : p = fmaf(p, t, -2.64646143e-1f); // -0x1.0eff66p-2
60 0 : p = fmaf(p, t, 8.40016484e-1f); // 0x1.ae16a4p-1
61 : } else { // maximum ulp error = 2.35002
62 2 : p = 5.43877832e-9f; // 0x1.75c000p-28
63 2 : p = fmaf(p, t, 1.43285448e-7f); // 0x1.33b402p-23
64 2 : p = fmaf(p, t, 1.22774793e-6f); // 0x1.499232p-20
65 2 : p = fmaf(p, t, 1.12963626e-7f); // 0x1.e52cd2p-24
66 2 : p = fmaf(p, t, -5.61530760e-5f); // -0x1.d70bd0p-15
67 2 : p = fmaf(p, t, -1.47697632e-4f); // -0x1.35be90p-13
68 2 : p = fmaf(p, t, 2.31468678e-3f); // 0x1.2f6400p-9
69 2 : p = fmaf(p, t, 1.15392581e-2f); // 0x1.7a1e50p-7
70 2 : p = fmaf(p, t, -2.32015476e-1f); // -0x1.db2aeep-3
71 2 : p = fmaf(p, t, 8.86226892e-1f); // 0x1.c5bf88p-1
72 : }
73 2 : r = a * p;
74 2 : return r;
75 : }
76 :
77 : /* compute natural logarithm with a maximum error of 0.85089 ulp */
78 2 : float my_logf(float a) {
79 : float i, m, r, s, t;
80 : int e;
81 :
82 2 : m = frexpf(a, &e);
83 2 : if (m < 0.666666667f) { // 0x1.555556p-1
84 1 : m = m + m;
85 1 : e = e - 1;
86 : }
87 2 : i = (float)e;
88 : /* m in [2/3, 4/3] */
89 2 : m = m - 1.0f;
90 2 : s = m * m;
91 : /* Compute log1p(m) for m in [-1/3, 1/3] */
92 2 : r = -0.130310059f; // -0x1.0ae000p-3
93 2 : t = 0.140869141f; // 0x1.208000p-3
94 2 : r = fmaf(r, s, -0.121484190f); // -0x1.f19968p-4
95 2 : t = fmaf(t, s, 0.139814854f); // 0x1.1e5740p-3
96 2 : r = fmaf(r, s, -0.166846052f); // -0x1.55b362p-3
97 2 : t = fmaf(t, s, 0.200120345f); // 0x1.99d8b2p-3
98 2 : r = fmaf(r, s, -0.249996200f); // -0x1.fffe02p-3
99 2 : r = fmaf(t, m, r);
100 2 : r = fmaf(r, m, 0.333331972f); // 0x1.5554fap-2
101 2 : r = fmaf(r, m, -0.500000000f); // -0x1.000000p-1
102 2 : r = fmaf(r, s, m);
103 2 : r = fmaf(i, 0.693147182f, r); // 0x1.62e430p-1 // log(2)
104 2 : if (!((a > 0.0f) && (a <= 3.40282346e+38f))) { // 0x1.fffffep+127
105 0 : r = a + a; // silence NaNs if necessary
106 0 : if (a < 0.0f)
107 0 : r = (0.0f / 0.0f); // NaN
108 : // if (a == 0.0f)
109 0 : if (fabs(a - 0.0f) < std::numeric_limits<double>::epsilon()) {
110 0 : r = (-1.0f / 0.0f); // -Inf
111 : }
112 : }
113 2 : return r;
114 : }
115 :
116 3 : double gpmp::stats::ProbDist::quantile_dist(double probability) {
117 3 : if (probability <= 0.0 || probability >= 1.0) {
118 1 : return 0.0; // Invalid input, return 0
119 : }
120 :
121 : // Using erfinv for older C++ standards
122 2 : double z = std::sqrt(2.0) * erfinv(2.0 * probability - 1.0);
123 :
124 2 : return z;
125 : }
126 :
127 1 : double gpmp::stats::ProbDist::normal_PDF(double x, double mean, double stddev) {
128 : // Implement the probability density function (PDF) for the normal
129 : // distribution You can use standard libraries or existing implementations
130 : // for this calculation Example using C++ standard library:
131 1 : double exponent = -0.5 * std::pow((x - mean) / stddev, 2.0);
132 1 : return (1.0 / (stddev * std::sqrt(2.0 * M_PI))) * std::exp(exponent);
133 : }
134 :
135 1 : double gpmp::stats::ProbDist::normal_CDF(double x, double mean, double stddev) {
136 : // Implement the cumulative distribution function (CDF) for the normal
137 : // distribution You can use standard libraries or existing implementations
138 : // for this calculation Example using C++ standard library:
139 1 : return 0.5 * (1.0 + std::erf((x - mean) / (stddev * std::sqrt(2.0))));
140 : }
141 :
142 1 : double gpmp::stats::ProbDist::uniform_CDF(size_t k, size_t n) {
143 1 : if (k == 0 || k > n) {
144 0 : return 0.0; // Invalid input, return 0
145 : }
146 :
147 1 : return static_cast<double>(k) / (n + 1);
148 : }
149 :
150 0 : double gpmp::stats::ProbDist::exp_PDF(double x, size_t k, double lambda) {
151 : // Check if k is valid (k must be 1 for exponential distribution)
152 0 : if (k != 1) {
153 : // Return 0 if k is not 1, as exponential distribution is only defined
154 : // for k = 1
155 0 : return 0.0;
156 : }
157 :
158 : // Check if lambda is non-positive
159 0 : if (lambda <= 0) {
160 : // Return 0 if lambda is non-positive
161 0 : return 0.0;
162 : }
163 :
164 : // Calculate the exponential PDF
165 0 : return (k / lambda) * exp(-k * x);
166 : }
167 :
168 1 : double gpmp::stats::ProbDist::emp_CDF(const std::vector<double> &data,
169 : double x) {
170 1 : size_t count = 0;
171 6 : for (const auto &value : data) {
172 5 : if (value <= x) {
173 3 : count++;
174 : }
175 : }
176 :
177 1 : return static_cast<double>(count) / data.size();
178 : }
179 :
180 1 : double gpmp::stats::ProbDist::emp_PMF(const std::vector<double> &data,
181 : double x) {
182 1 : size_t count = std::count(data.begin(), data.end(), x);
183 1 : return static_cast<double>(count) / data.size();
184 : }
185 :
186 1 : double gpmp::stats::ProbDist::inverse_emp_CDF(const std::vector<double> &data,
187 : double p) {
188 1 : if (data.empty() || p < 0.0 || p > 1.0) {
189 0 : return 0.0; // Invalid input, return 0
190 : }
191 :
192 1 : std::vector<double> sortedData = data;
193 1 : std::sort(sortedData.begin(), sortedData.end());
194 :
195 1 : size_t index = static_cast<size_t>(p * (data.size() - 1));
196 1 : return sortedData[index];
197 1 : }
198 :
199 2 : double gpmp::stats::ProbDist::mle(const std::vector<double> &data) {
200 2 : if (data.empty()) {
201 0 : return 0.0; // Invalid input, return 0
202 : }
203 :
204 2 : double sum = std::accumulate(data.begin(), data.end(), 0.0);
205 2 : return sum / data.size();
206 : }
207 :
208 1 : double gpmp::stats::ProbDist::mom(const std::vector<double> &data) {
209 1 : if (data.empty()) {
210 0 : return 0.0; // Invalid input, return 0
211 : }
212 : gpmp::stats::Describe desc;
213 :
214 1 : double mean = desc.mean_arith(data);
215 1 : double variance = desc.variance(data, mean);
216 :
217 1 : return mean - variance / 2.0;
218 : }
219 :
220 1 : double gpmp::stats::ProbDist::mle_est(const std::vector<double> &data) {
221 : // This is a placeholder, you can replace it with a specific M-estimation
222 : // method
223 1 : return mle(data);
224 : }
225 :
226 1 : double gpmp::stats::ProbDist::mumv(const std::vector<double> &data) {
227 1 : if (data.empty()) {
228 0 : return 0.0; // Invalid input, return 0
229 : }
230 :
231 1 : double sum = std::accumulate(data.begin(), data.end(), 0.0);
232 1 : return sum / data.size();
233 : }
234 :
235 2 : double gpmp::stats::ProbDist::median_uniased(const std::vector<double> &data) {
236 2 : if (data.empty()) {
237 0 : return 0.0; // Invalid input, return 0
238 : }
239 :
240 2 : std::vector<double> sortedData = data;
241 2 : std::sort(sortedData.begin(), sortedData.end());
242 :
243 2 : size_t size = sortedData.size();
244 2 : if (size % 2 == 0) {
245 1 : return (sortedData[size / 2 - 1] + sortedData[size / 2]) / 2.0;
246 : } else {
247 1 : return sortedData[size / 2];
248 : }
249 2 : }
250 :
251 : // Interval Estimation
252 : std::pair<double, double>
253 1 : gpmp::stats::ProbDist::ConfidenceInterval(const std::vector<double> &data,
254 : double alpha) {
255 1 : if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
256 0 : return {0.0, 0.0}; // Invalid input, return an empty interval
257 : }
258 :
259 1 : size_t n = data.size();
260 : gpmp::stats::Describe desc;
261 1 : double mean = desc.mean_arith(data);
262 1 : double stddev = desc.stdev(data, mean);
263 :
264 : // Assuming a normal distribution for simplicity
265 1 : double z = quantile_dist(1 - alpha / 2.0);
266 1 : double margin = z * stddev / std::sqrt(static_cast<double>(n));
267 :
268 1 : return {mean - margin, mean + margin};
269 : }
270 :
271 0 : double gpmp::stats::ProbDist::Pivot(
272 : const std::vector<double> &data,
273 : double pivotFunction(const std::vector<double> &)) {
274 0 : if (data.empty()) {
275 0 : return 0.0; // Invalid input, return 0
276 : }
277 :
278 0 : return pivotFunction(data);
279 : }
280 :
281 : // Example of a pivot function for Confidence Interval
282 0 : double gpmp::stats::ProbDist::PivotFunctionForConfidenceInterval(
283 : const std::vector<double> &data) {
284 0 : size_t n = data.size();
285 :
286 : gpmp::stats::Describe desc;
287 0 : double mean = desc.mean_arith(data);
288 0 : double stddev = desc.stdev(data, mean);
289 :
290 0 : return mean + 2 * stddev / std::sqrt(static_cast<double>(n));
291 : }
292 :
293 : std::pair<double, double>
294 1 : gpmp::stats::ProbDist::LikelihoodInterval(const std::vector<double> &data,
295 : double alpha) {
296 : // Example implementation, this needs to be adapted based on the specific
297 : // likelihood function
298 1 : if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
299 0 : return {0.0, 0.0}; // Invalid input, return an empty interval
300 : }
301 :
302 : // Placeholder, implement likelihood function and find confidence bounds
303 1 : double lowerBound = 0.0;
304 1 : double upperBound = 1.0;
305 :
306 1 : return {lowerBound, upperBound};
307 : }
308 :
309 : std::pair<double, double>
310 1 : gpmp::stats::ProbDist::PredictionInterval(const std::vector<double> &data,
311 : double alpha) {
312 1 : if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
313 0 : return {0.0, 0.0}; // Invalid input, return an empty interval
314 : }
315 :
316 : // Placeholder, implement prediction interval calculation
317 1 : double lowerBound = 0.0;
318 1 : double upperBound = 1.0;
319 :
320 1 : return {lowerBound, upperBound};
321 : }
322 :
323 : std::pair<double, double>
324 1 : gpmp::stats::ProbDist::ToleranceInterval(const std::vector<double> &data,
325 : double alpha) {
326 1 : if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
327 0 : return {0.0, 0.0}; // Invalid input, return an empty interval
328 : }
329 :
330 : // Placeholder, implement tolerance interval calculation
331 1 : double lowerBound = 0.0;
332 1 : double upperBound = 1.0;
333 :
334 1 : return {lowerBound, upperBound};
335 : }
|