LCOV - lcov.info - modules/stats/probdist.cpp

LCOV - code coverage report

Current view:	top level - modules/stats - probdist.cpp (source / functions)		Hit	Total	Coverage
Test:	lcov.info	Lines:	114	153	74.5 %
Date:	2024-05-13 05:06:06	Functions:	18	21	85.7 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*************************************************************************
       2             :  *
       3             :  *  Project
       4             :  *                         _____ _____  __  __ _____
       5             :  *                        / ____|  __ \|  \/  |  __ \
       6             :  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |
       7             :  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/
       8             :  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |
       9             :  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|
      10             :  *      | |
      11             :  *      |_|
      12             :  *
      13             :  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
      14             :  *
      15             :  * This software is licensed as described in the file LICENSE, which
      16             :  * you should have received as part of this distribution. The terms
      17             :  * among other details are referenced in the official documentation
      18             :  * seen here : https://akielaries.github.io/openGPMP/ along with
      19             :  * important files seen in this project.
      20             :  *
      21             :  * You may opt to use, copy, modify, merge, publish, distribute
      22             :  * and/or sell copies of the Software, and permit persons to whom
      23             :  * the Software is furnished to do so, under the terms of the
      24             :  * LICENSE file. As this is an Open Source effort, all implementations
      25             :  * must be of the same methodology.
      26             :  *
      27             :  *
      28             :  *
      29             :  * This software is distributed on an AS IS basis, WITHOUT
      30             :  * WARRANTY OF ANY KIND, either express or implied.
      31             :  *
      32             :  ************************************************************************/
      33             : 
      34             : #include <algorithm>
      35             : #include <cmath>
      36             : #include <limits>
      37             : #include <math.h>
      38             : #include <numeric>
      39             : #include <openGPMP/stats/describe.hpp>
      40             : #include <openGPMP/stats/probdist.hpp>
      41             : #include <random>
      42             : #include <vector>
      43             : 
      44             : float my_logf(float);
      45             : 
      46             : /* compute inverse error functions with maximum error of 2.35793 ulp */
      47           2 : float erfinv(float a) {
      48             :     float p, r, t;
      49           2 :     t = fmaf(a, 0.0f - a, 1.0f);
      50           2 :     t = my_logf(t);
      51           2 :     if (fabsf(t) > 6.125f) {             // maximum ulp error = 2.35793
      52           0 :         p = 3.03697567e-10f;             //  0x1.4deb44p-32
      53           0 :         p = fmaf(p, t, 2.93243101e-8f);  //  0x1.f7c9aep-26
      54           0 :         p = fmaf(p, t, 1.22150334e-6f);  //  0x1.47e512p-20
      55           0 :         p = fmaf(p, t, 2.84108955e-5f);  //  0x1.dca7dep-16
      56           0 :         p = fmaf(p, t, 3.93552968e-4f);  //  0x1.9cab92p-12
      57           0 :         p = fmaf(p, t, 3.02698812e-3f);  //  0x1.8cc0dep-9
      58           0 :         p = fmaf(p, t, 4.83185798e-3f);  //  0x1.3ca920p-8
      59           0 :         p = fmaf(p, t, -2.64646143e-1f); // -0x1.0eff66p-2
      60           0 :         p = fmaf(p, t, 8.40016484e-1f);  //  0x1.ae16a4p-1
      61             :     } else {                             // maximum ulp error = 2.35002
      62           2 :         p = 5.43877832e-9f;              //  0x1.75c000p-28
      63           2 :         p = fmaf(p, t, 1.43285448e-7f);  //  0x1.33b402p-23
      64           2 :         p = fmaf(p, t, 1.22774793e-6f);  //  0x1.499232p-20
      65           2 :         p = fmaf(p, t, 1.12963626e-7f);  //  0x1.e52cd2p-24
      66           2 :         p = fmaf(p, t, -5.61530760e-5f); // -0x1.d70bd0p-15
      67           2 :         p = fmaf(p, t, -1.47697632e-4f); // -0x1.35be90p-13
      68           2 :         p = fmaf(p, t, 2.31468678e-3f);  //  0x1.2f6400p-9
      69           2 :         p = fmaf(p, t, 1.15392581e-2f);  //  0x1.7a1e50p-7
      70           2 :         p = fmaf(p, t, -2.32015476e-1f); // -0x1.db2aeep-3
      71           2 :         p = fmaf(p, t, 8.86226892e-1f);  //  0x1.c5bf88p-1
      72             :     }
      73           2 :     r = a * p;
      74           2 :     return r;
      75             : }
      76             : 
      77             : /* compute natural logarithm with a maximum error of 0.85089 ulp */
      78           2 : float my_logf(float a) {
      79             :     float i, m, r, s, t;
      80             :     int e;
      81             : 
      82           2 :     m = frexpf(a, &e);
      83           2 :     if (m < 0.666666667f) { // 0x1.555556p-1
      84           1 :         m = m + m;
      85           1 :         e = e - 1;
      86             :     }
      87           2 :     i = (float)e;
      88             :     /* m in [2/3, 4/3] */
      89           2 :     m = m - 1.0f;
      90           2 :     s = m * m;
      91             :     /* Compute log1p(m) for m in [-1/3, 1/3] */
      92           2 :     r = -0.130310059f;             // -0x1.0ae000p-3
      93           2 :     t = 0.140869141f;              //  0x1.208000p-3
      94           2 :     r = fmaf(r, s, -0.121484190f); // -0x1.f19968p-4
      95           2 :     t = fmaf(t, s, 0.139814854f);  //  0x1.1e5740p-3
      96           2 :     r = fmaf(r, s, -0.166846052f); // -0x1.55b362p-3
      97           2 :     t = fmaf(t, s, 0.200120345f);  //  0x1.99d8b2p-3
      98           2 :     r = fmaf(r, s, -0.249996200f); // -0x1.fffe02p-3
      99           2 :     r = fmaf(t, m, r);
     100           2 :     r = fmaf(r, m, 0.333331972f);  //  0x1.5554fap-2
     101           2 :     r = fmaf(r, m, -0.500000000f); // -0x1.000000p-1
     102           2 :     r = fmaf(r, s, m);
     103           2 :     r = fmaf(i, 0.693147182f, r);                  //  0x1.62e430p-1 // log(2)
     104           2 :     if (!((a > 0.0f) && (a <= 3.40282346e+38f))) { // 0x1.fffffep+127
     105           0 :         r = a + a;                                 // silence NaNs if necessary
     106           0 :         if (a < 0.0f)
     107           0 :             r = (0.0f / 0.0f); //  NaN
     108             :         // if (a == 0.0f)
     109           0 :         if (fabs(a - 0.0f) < std::numeric_limits<double>::epsilon()) {
     110           0 :             r = (-1.0f / 0.0f); // -Inf
     111             :         }
     112             :     }
     113           2 :     return r;
     114             : }
     115             : 
     116           3 : double gpmp::stats::ProbDist::quantile_dist(double probability) {
     117           3 :     if (probability <= 0.0 || probability >= 1.0) {
     118           1 :         return 0.0; // Invalid input, return 0
     119             :     }
     120             : 
     121             :     // Using erfinv for older C++ standards
     122           2 :     double z = std::sqrt(2.0) * erfinv(2.0 * probability - 1.0);
     123             : 
     124           2 :     return z;
     125             : }
     126             : 
     127           1 : double gpmp::stats::ProbDist::normal_PDF(double x, double mean, double stddev) {
     128             :     // Implement the probability density function (PDF) for the normal
     129             :     // distribution You can use standard libraries or existing implementations
     130             :     // for this calculation Example using C++ standard library:
     131           1 :     double exponent = -0.5 * std::pow((x - mean) / stddev, 2.0);
     132           1 :     return (1.0 / (stddev * std::sqrt(2.0 * M_PI))) * std::exp(exponent);
     133             : }
     134             : 
     135           1 : double gpmp::stats::ProbDist::normal_CDF(double x, double mean, double stddev) {
     136             :     // Implement the cumulative distribution function (CDF) for the normal
     137             :     // distribution You can use standard libraries or existing implementations
     138             :     // for this calculation Example using C++ standard library:
     139           1 :     return 0.5 * (1.0 + std::erf((x - mean) / (stddev * std::sqrt(2.0))));
     140             : }
     141             : 
     142           1 : double gpmp::stats::ProbDist::uniform_CDF(size_t k, size_t n) {
     143           1 :     if (k == 0 || k > n) {
     144           0 :         return 0.0; // Invalid input, return 0
     145             :     }
     146             : 
     147           1 :     return static_cast<double>(k) / (n + 1);
     148             : }
     149             : 
     150           0 : double gpmp::stats::ProbDist::exp_PDF(double x, size_t k, double lambda) {
     151             :     // Check if k is valid (k must be 1 for exponential distribution)
     152           0 :     if (k != 1) {
     153             :         // Return 0 if k is not 1, as exponential distribution is only defined
     154             :         // for k = 1
     155           0 :         return 0.0;
     156             :     }
     157             : 
     158             :     // Check if lambda is non-positive
     159           0 :     if (lambda <= 0) {
     160             :         // Return 0 if lambda is non-positive
     161           0 :         return 0.0;
     162             :     }
     163             : 
     164             :     // Calculate the exponential PDF
     165           0 :     return (k / lambda) * exp(-k * x);
     166             : }
     167             : 
     168           1 : double gpmp::stats::ProbDist::emp_CDF(const std::vector<double> &data,
     169             :                                       double x) {
     170           1 :     size_t count = 0;
     171           6 :     for (const auto &value : data) {
     172           5 :         if (value <= x) {
     173           3 :             count++;
     174             :         }
     175             :     }
     176             : 
     177           1 :     return static_cast<double>(count) / data.size();
     178             : }
     179             : 
     180           1 : double gpmp::stats::ProbDist::emp_PMF(const std::vector<double> &data,
     181             :                                       double x) {
     182           1 :     size_t count = std::count(data.begin(), data.end(), x);
     183           1 :     return static_cast<double>(count) / data.size();
     184             : }
     185             : 
     186           1 : double gpmp::stats::ProbDist::inverse_emp_CDF(const std::vector<double> &data,
     187             :                                               double p) {
     188           1 :     if (data.empty() || p < 0.0 || p > 1.0) {
     189           0 :         return 0.0; // Invalid input, return 0
     190             :     }
     191             : 
     192           1 :     std::vector<double> sortedData = data;
     193           1 :     std::sort(sortedData.begin(), sortedData.end());
     194             : 
     195           1 :     size_t index = static_cast<size_t>(p * (data.size() - 1));
     196           1 :     return sortedData[index];
     197           1 : }
     198             : 
     199           2 : double gpmp::stats::ProbDist::mle(const std::vector<double> &data) {
     200           2 :     if (data.empty()) {
     201           0 :         return 0.0; // Invalid input, return 0
     202             :     }
     203             : 
     204           2 :     double sum = std::accumulate(data.begin(), data.end(), 0.0);
     205           2 :     return sum / data.size();
     206             : }
     207             : 
     208           1 : double gpmp::stats::ProbDist::mom(const std::vector<double> &data) {
     209           1 :     if (data.empty()) {
     210           0 :         return 0.0; // Invalid input, return 0
     211             :     }
     212             :     gpmp::stats::Describe desc;
     213             : 
     214           1 :     double mean = desc.mean_arith(data);
     215           1 :     double variance = desc.variance(data, mean);
     216             : 
     217           1 :     return mean - variance / 2.0;
     218             : }
     219             : 
     220           1 : double gpmp::stats::ProbDist::mle_est(const std::vector<double> &data) {
     221             :     // This is a placeholder, you can replace it with a specific M-estimation
     222             :     // method
     223           1 :     return mle(data);
     224             : }
     225             : 
     226           1 : double gpmp::stats::ProbDist::mumv(const std::vector<double> &data) {
     227           1 :     if (data.empty()) {
     228           0 :         return 0.0; // Invalid input, return 0
     229             :     }
     230             : 
     231           1 :     double sum = std::accumulate(data.begin(), data.end(), 0.0);
     232           1 :     return sum / data.size();
     233             : }
     234             : 
     235           2 : double gpmp::stats::ProbDist::median_uniased(const std::vector<double> &data) {
     236           2 :     if (data.empty()) {
     237           0 :         return 0.0; // Invalid input, return 0
     238             :     }
     239             : 
     240           2 :     std::vector<double> sortedData = data;
     241           2 :     std::sort(sortedData.begin(), sortedData.end());
     242             : 
     243           2 :     size_t size = sortedData.size();
     244           2 :     if (size % 2 == 0) {
     245           1 :         return (sortedData[size / 2 - 1] + sortedData[size / 2]) / 2.0;
     246             :     } else {
     247           1 :         return sortedData[size / 2];
     248             :     }
     249           2 : }
     250             : 
     251             : // Interval Estimation
     252             : std::pair<double, double>
     253           1 : gpmp::stats::ProbDist::ConfidenceInterval(const std::vector<double> &data,
     254             :                                           double alpha) {
     255           1 :     if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
     256           0 :         return {0.0, 0.0}; // Invalid input, return an empty interval
     257             :     }
     258             : 
     259           1 :     size_t n = data.size();
     260             :     gpmp::stats::Describe desc;
     261           1 :     double mean = desc.mean_arith(data);
     262           1 :     double stddev = desc.stdev(data, mean);
     263             : 
     264             :     // Assuming a normal distribution for simplicity
     265           1 :     double z = quantile_dist(1 - alpha / 2.0);
     266           1 :     double margin = z * stddev / std::sqrt(static_cast<double>(n));
     267             : 
     268           1 :     return {mean - margin, mean + margin};
     269             : }
     270             : 
     271           0 : double gpmp::stats::ProbDist::Pivot(
     272             :     const std::vector<double> &data,
     273             :     double pivotFunction(const std::vector<double> &)) {
     274           0 :     if (data.empty()) {
     275           0 :         return 0.0; // Invalid input, return 0
     276             :     }
     277             : 
     278           0 :     return pivotFunction(data);
     279             : }
     280             : 
     281             : // Example of a pivot function for Confidence Interval
     282           0 : double gpmp::stats::ProbDist::PivotFunctionForConfidenceInterval(
     283             :     const std::vector<double> &data) {
     284           0 :     size_t n = data.size();
     285             : 
     286             :     gpmp::stats::Describe desc;
     287           0 :     double mean = desc.mean_arith(data);
     288           0 :     double stddev = desc.stdev(data, mean);
     289             : 
     290           0 :     return mean + 2 * stddev / std::sqrt(static_cast<double>(n));
     291             : }
     292             : 
     293             : std::pair<double, double>
     294           1 : gpmp::stats::ProbDist::LikelihoodInterval(const std::vector<double> &data,
     295             :                                           double alpha) {
     296             :     // Example implementation, this needs to be adapted based on the specific
     297             :     // likelihood function
     298           1 :     if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
     299           0 :         return {0.0, 0.0}; // Invalid input, return an empty interval
     300             :     }
     301             : 
     302             :     // Placeholder, implement likelihood function and find confidence bounds
     303           1 :     double lowerBound = 0.0;
     304           1 :     double upperBound = 1.0;
     305             : 
     306           1 :     return {lowerBound, upperBound};
     307             : }
     308             : 
     309             : std::pair<double, double>
     310           1 : gpmp::stats::ProbDist::PredictionInterval(const std::vector<double> &data,
     311             :                                           double alpha) {
     312           1 :     if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
     313           0 :         return {0.0, 0.0}; // Invalid input, return an empty interval
     314             :     }
     315             : 
     316             :     // Placeholder, implement prediction interval calculation
     317           1 :     double lowerBound = 0.0;
     318           1 :     double upperBound = 1.0;
     319             : 
     320           1 :     return {lowerBound, upperBound};
     321             : }
     322             : 
     323             : std::pair<double, double>
     324           1 : gpmp::stats::ProbDist::ToleranceInterval(const std::vector<double> &data,
     325             :                                          double alpha) {
     326           1 :     if (data.empty() || alpha <= 0.0 || alpha >= 1.0) {
     327           0 :         return {0.0, 0.0}; // Invalid input, return an empty interval
     328             :     }
     329             : 
     330             :     // Placeholder, implement tolerance interval calculation
     331           1 :     double lowerBound = 0.0;
     332           1 :     double upperBound = 1.0;
     333             : 
     334           1 :     return {lowerBound, upperBound};
     335             : }

Generated by: LCOV version 1.14