LCOV - lcov.info - modules/stats/hypothesis.cpp

LCOV - code coverage report

Current view:	top level - modules/stats - hypothesis.cpp (source / functions)		Hit	Total	Coverage
Test:	lcov.info	Lines:	0	179	0.0 %
Date:	2024-05-13 05:06:06	Functions:	0	14	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*************************************************************************
       2             :  *
       3             :  *  Project
       4             :  *                         _____ _____  __  __ _____
       5             :  *                        / ____|  __ \|  \/  |  __ \
       6             :  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |
       7             :  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/
       8             :  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |
       9             :  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|
      10             :  *      | |
      11             :  *      |_|
      12             :  *
      13             :  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
      14             :  *
      15             :  * This software is licensed as described in the file LICENSE, which
      16             :  * you should have received as part of this distribution. The terms
      17             :  * among other details are referenced in the official documentation
      18             :  * seen here : https://akielaries.github.io/openGPMP/ along with
      19             :  * important files seen in this project.
      20             :  *
      21             :  * You may opt to use, copy, modify, merge, publish, distribute
      22             :  * and/or sell copies of the Software, and permit persons to whom
      23             :  * the Software is furnished to do so, under the terms of the
      24             :  * LICENSE file.
      25             :  *
      26             :  *
      27             :  *
      28             :  * This software is distributed on an AS IS basis, WITHOUT
      29             :  * WARRANTY OF ANY KIND, either express or implied.
      30             :  *
      31             :  ************************************************************************/
      32             : #include <algorithm>
      33             : #include <cmath>
      34             : #include <iostream>
      35             : #include <limits>
      36             : #include <openGPMP/stats/describe.hpp>
      37             : #include <openGPMP/stats/hypothesis.hpp>
      38             : #include <vector>
      39             : 
      40             : // Method for conducting a one-sample t-test
      41             : double
      42           0 : gpmp::stats::HypothesisTest::one_sample_ttest(const std::vector<double> &sample,
      43             :                                               double populationMean) {
      44           0 :     int n = sample.size();
      45           0 :     double sampleMean = gpmp::stats::Describe::mean_arith(sample);
      46           0 :     double sampleStdDev = gpmp::stats::Describe::stdev(sample, sampleMean);
      47           0 :     double standardError = sampleStdDev / sqrt(n);
      48           0 :     return (sampleMean - populationMean) / standardError;
      49             : }
      50             : 
      51             : // Method for conducting a two-sample t-test
      52           0 : double gpmp::stats::HypothesisTest::two_sample_ttest(
      53             :     const std::vector<double> &sample1,
      54             :     const std::vector<double> &sample2) {
      55           0 :     int n1 = sample1.size();
      56           0 :     int n2 = sample2.size();
      57           0 :     double sampleMean1 = gpmp::stats::Describe::mean_arith(sample1);
      58           0 :     double sampleMean2 = gpmp::stats::Describe::mean_arith(sample2);
      59           0 :     double sampleVar1 = gpmp::stats::Describe::variance(sample1, sampleMean1);
      60           0 :     double sampleVar2 = gpmp::stats::Describe::variance(sample2, sampleMean2);
      61           0 :     double pooledVar =
      62           0 :         ((n1 - 1) * sampleVar1 + (n2 - 1) * sampleVar2) / (n1 + n2 - 2);
      63             :     double t =
      64           0 :         (sampleMean1 - sampleMean2) / sqrt(pooledVar * (1.0 / n1 + 1.0 / n2));
      65           0 :     return t;
      66             : }
      67             : 
      68             : // Method for conducting ANOVA (Analysis of Variance)
      69           0 : double gpmp::stats::HypothesisTest::ANOVA(
      70             :     const std::vector<std::vector<double>> &samples) {
      71           0 :     int k = samples.size();
      72           0 :     int n = 0;
      73           0 :     double grandMean = 0.0;
      74           0 :     double SSB = 0.0;
      75           0 :     double SSW = 0.0;
      76             : 
      77             :     // Calculate total number of observations and grand mean
      78           0 :     for (const auto &sample : samples) {
      79           0 :         n += sample.size();
      80           0 :         double sampleMean = gpmp::stats::Describe::mean_arith(sample);
      81           0 :         grandMean += sampleMean;
      82             :     }
      83           0 :     grandMean /= k;
      84             : 
      85             :     // Calculate sum of squares between groups (SSB) and within groups (SSW)
      86           0 :     for (int i = 0; i < k; ++i) {
      87           0 :         double sampleMean = gpmp::stats::Describe::mean_arith(samples[i]);
      88           0 :         for (double x : samples[i]) {
      89           0 :             SSB += pow((sampleMean - grandMean), 2);
      90           0 :             SSW += pow((x - sampleMean), 2);
      91             :         }
      92             :     }
      93             : 
      94             :     // Calculate degrees of freedom
      95           0 :     int dfBetweenGroups = k - 1;
      96           0 :     int dfWithinGroups = n - k;
      97             : 
      98             :     // Calculate F-statistic
      99           0 :     double MSB = SSB / dfBetweenGroups;
     100           0 :     double MSW = SSW / dfWithinGroups;
     101           0 :     double F = MSB / MSW;
     102             : 
     103           0 :     return F;
     104             : }
     105             : 
     106             : // Method for conducting chi-square test of independence
     107           0 : double gpmp::stats::HypothesisTest::chi_square_test(
     108             :     const std::vector<std::vector<int>> &observed,
     109             :     const std::vector<std::vector<double>> &expected) {
     110           0 :     int rows = observed.size();
     111           0 :     int cols = observed[0].size();
     112           0 :     double chiSquare = 0.0;
     113             : 
     114           0 :     for (int i = 0; i < rows; ++i) {
     115           0 :         for (int j = 0; j < cols; ++j) {
     116           0 :             chiSquare +=
     117           0 :                 pow((observed[i][j] - expected[i][j]), 2) / expected[i][j];
     118             :         }
     119             :     }
     120             : 
     121           0 :     return chiSquare;
     122             : }
     123             : 
     124             : // Method for conducting z-test for proportions
     125           0 : double gpmp::stats::HypothesisTest::proportion_z_test(double p1,
     126             :                                                       double p2,
     127             :                                                       double n1,
     128             :                                                       double n2) {
     129           0 :     double p = (p1 * n1 + p2 * n2) / (n1 + n2);
     130           0 :     double z = (p1 - p2) / sqrt(p * (1 - p) * (1 / n1 + 1 / n2));
     131           0 :     return z;
     132             : }
     133             : 
     134             : // Method for conducting Wilcoxon signed-rank test
     135           0 : double gpmp::stats::HypothesisTest::wilcoxon_rank_test(
     136             :     const std::vector<double> &sample1,
     137             :     const std::vector<double> &sample2) {
     138           0 :     int n = sample1.size();
     139           0 :     if (n != static_cast<int>(sample2.size())) {
     140           0 :         std::cerr << "Sample sizes must be equal for Wilcoxon signed-rank test."
     141           0 :                   << std::endl;
     142           0 :         return std::numeric_limits<double>::quiet_NaN();
     143             :     }
     144             : 
     145           0 :     std::vector<double> differences;
     146           0 :     for (int i = 0; i < n; ++i) {
     147           0 :         differences.push_back(sample1[i] - sample2[i]);
     148             :     }
     149           0 :     std::sort(differences.begin(), differences.end(), [](double a, double b) {
     150           0 :         return std::abs(a) < std::abs(b);
     151             :     });
     152             : 
     153           0 :     double Tplus = 0;
     154           0 :     double Tminus = 0;
     155           0 :     int numPositive = 0;
     156           0 :     int numNegative = 0;
     157           0 :     for (double diff : differences) {
     158           0 :         if (diff > 0) {
     159           0 :             Tplus += diff;
     160           0 :             numPositive++;
     161           0 :         } else if (diff < 0) {
     162           0 :             Tminus -= diff;
     163           0 :             numNegative++;
     164             :         }
     165             :     }
     166           0 :     int T = std::min(Tplus, Tminus);
     167             : 
     168             :     // Calculate the critical value using the normal approximation
     169           0 :     double mean = n * (n + 1) / 4.0;
     170           0 :     double stdDev = sqrt(n * (n + 1) * (2 * n + 1) / 24.0);
     171           0 :     double z = (T - mean) / stdDev;
     172             : 
     173           0 :     return z;
     174           0 : }
     175             : 
     176             : // Method for conducting Mann-Whitney U test
     177           0 : double gpmp::stats::HypothesisTest::mann_whitney_test(
     178             :     const std::vector<double> &sample1,
     179             :     const std::vector<double> &sample2) {
     180           0 :     int n1 = sample1.size();
     181           0 :     int n2 = sample2.size();
     182           0 :     double U1 = gpmp::stats::Describe::u_stat(sample1, sample2);
     183           0 :     double U2 = gpmp::stats::Describe::u_stat(sample2, sample1);
     184           0 :     double U = std::min(U1, U2);
     185             : 
     186             :     // Calculate the expected value of U
     187           0 :     double expectedU = n1 * n2 / 2.0;
     188             : 
     189             :     // Calculate the standard deviation of U
     190           0 :     double stdDev = sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0);
     191             : 
     192             :     // Calculate the z-score
     193           0 :     double z = (U - expectedU) / stdDev;
     194             : 
     195           0 :     return z;
     196             : }
     197             : 
     198             : // Method for conducting Fisher's Exact Test
     199           0 : double gpmp::stats::HypothesisTest::fisher_test(
     200             :     const std::vector<std::vector<int>> &table) {
     201           0 :     int nRows = table.size();
     202           0 :     int nCols = table[0].size();
     203             : 
     204           0 :     if (nRows != 2 || nCols != 2) {
     205           0 :         std::cerr << "Fisher's Exact Test requires a 2x2 contingency table."
     206           0 :                   << std::endl;
     207           0 :         return std::numeric_limits<double>::quiet_NaN();
     208             :     }
     209             : 
     210           0 :     int a = table[0][0];
     211           0 :     int b = table[0][1];
     212           0 :     int c = table[1][0];
     213           0 :     int d = table[1][1];
     214             : 
     215           0 :     double p = (factorial(a + b) * factorial(c + d) * factorial(a + c) *
     216           0 :                 factorial(b + d)) /
     217           0 :                (factorial(a) * factorial(b) * factorial(c) * factorial(d) *
     218           0 :                 factorial(a + b + c + d));
     219             : 
     220           0 :     return p;
     221             : }
     222             : 
     223             : // Method for conducting Kolmogorov-Smirnov Test
     224           0 : double gpmp::stats::HypothesisTest::kol_smirnov_test(
     225             :     const std::vector<double> &sample1,
     226             :     const std::vector<double> &sample2) {
     227           0 :     int n1 = sample1.size();
     228           0 :     int n2 = sample2.size();
     229             : 
     230           0 :     std::vector<double> combinedSamples = sample1;
     231           0 :     combinedSamples.insert(combinedSamples.end(),
     232             :                            sample2.begin(),
     233             :                            sample2.end());
     234           0 :     std::sort(combinedSamples.begin(), combinedSamples.end());
     235             : 
     236           0 :     double maxDPlus = 0.0;
     237           0 :     double maxDMinus = 0.0;
     238             : 
     239           0 :     for (size_t i = 0; i < combinedSamples.size(); ++i) {
     240           0 :         double DPlus = (i + 1) / static_cast<double>(n1) - combinedSamples[i];
     241           0 :         double DMinus = combinedSamples[i] - i / static_cast<double>(n2);
     242             : 
     243           0 :         maxDPlus = std::max(maxDPlus, DPlus);
     244           0 :         maxDMinus = std::max(maxDMinus, DMinus);
     245             :     }
     246             : 
     247           0 :     return std::max(maxDPlus, maxDMinus);
     248           0 : }
     249             : 
     250             : // Method for conducting Wilcoxon Rank Sum Test (Mann-Whitney U Test)
     251           0 : double gpmp::stats::HypothesisTest::wilcoxon_rank_sum_test(
     252             :     const std::vector<double> &sample1,
     253             :     const std::vector<double> &sample2) {
     254           0 :     int n1 = sample1.size();
     255           0 :     int n2 = sample2.size();
     256           0 :     std::vector<double> ranks;
     257           0 :     ranks.reserve(n1 + n2);
     258             : 
     259           0 :     for (double x : sample1) {
     260           0 :         ranks.push_back(x);
     261             :     }
     262           0 :     for (double x : sample2) {
     263           0 :         ranks.push_back(x);
     264             :     }
     265             : 
     266           0 :     std::sort(ranks.begin(), ranks.end());
     267             : 
     268           0 :     double rankSum1 = 0.0;
     269           0 :     for (double x : sample1) {
     270           0 :         rankSum1 +=
     271           0 :             std::distance(ranks.begin(),
     272             :                           std::lower_bound(ranks.begin(), ranks.end(), x));
     273             :     }
     274             : 
     275           0 :     double U1 = rankSum1 - (n1 * (n1 + 1)) / 2.0;
     276           0 :     double U2 = n1 * n2 - U1;
     277             : 
     278           0 :     return std::min(U1, U2);
     279           0 : }
     280             : 
     281             : // Method for conducting Kruskal-Wallis Test
     282           0 : double gpmp::stats::HypothesisTest::kruskal_wallis_test(
     283             :     const std::vector<std::vector<double>> &samples) {
     284           0 :     int k = samples.size();
     285           0 :     std::vector<std::pair<double, int>> combinedData;
     286             : 
     287           0 :     for (int i = 0; i < k; ++i) {
     288           0 :         for (double x : samples[i]) {
     289           0 :             combinedData.push_back(std::make_pair(x, i));
     290             :         }
     291             :     }
     292             : 
     293           0 :     std::sort(combinedData.begin(), combinedData.end());
     294             : 
     295           0 :     std::vector<double> ranks;
     296           0 :     ranks.reserve(combinedData.size());
     297             : 
     298           0 :     int rank = 1;
     299           0 :     ranks.push_back(rank);
     300           0 :     for (size_t i = 1; i < combinedData.size(); ++i) {
     301           0 :         if (std::abs(combinedData[i].first - combinedData[i - 1].first) >
     302           0 :             std::numeric_limits<double>::epsilon()) {
     303           0 :             rank++;
     304             :         }
     305           0 :         ranks.push_back(rank);
     306             :     }
     307             : 
     308           0 :     double H = 0.0;
     309           0 :     for (int i = 0; i < k; ++i) {
     310           0 :         double rankSum = 0.0;
     311           0 :         for (size_t j = 0; j < samples[i].size(); ++j) {
     312           0 :             rankSum += ranks[i * samples[i].size() + j];
     313             :         }
     314           0 :         H += (rankSum * rankSum) / samples[i].size();
     315             :     }
     316           0 :     H = (12.0 / (combinedData.size() * (combinedData.size() + 1))) * H -
     317           0 :         3.0 * (combinedData.size() + 1);
     318             : 
     319           0 :     return H;
     320           0 : }
     321             : 
     322             : // Method for conducting Runs Test
     323             : double
     324           0 : gpmp::stats::HypothesisTest::runs_test(const std::vector<bool> &sequence) {
     325           0 :     int n = sequence.size();
     326           0 :     int numRuns = 1;
     327             : 
     328           0 :     for (int i = 1; i < n; ++i) {
     329           0 :         if (sequence[i] != sequence[i - 1]) {
     330           0 :             numRuns++;
     331             :         }
     332             :     }
     333             : 
     334           0 :     double expectedRuns = (2.0 * n - 1) / 3.0;
     335           0 :     double varianceRuns = (16.0 * n - 29) / 90.0;
     336           0 :     double z = (numRuns - expectedRuns) / sqrt(varianceRuns);
     337             : 
     338           0 :     return z;
     339             : }
     340             : 
     341             : // Helper method to calculate factorial
     342           0 : int gpmp::stats::HypothesisTest::factorial(int n) {
     343           0 :     if (n <= 1) {
     344           0 :         return 1;
     345             :     }
     346           0 :     return n * factorial(n - 1);
     347             : }

Generated by: LCOV version 1.14