A class for conducting various statistical hypothesis tests. More...

#include <hypothesis.hpp>

Public Member Functions
	HypothesisTest ()
	Constructor. More...

	~HypothesisTest ()
	Destructor. More...

Static Public Member Functions
static double	one_sample_ttest (const std::vector< double > &sample, double populationMean)
	One-sample t-test. More...

static double	two_sample_ttest (const std::vector< double > &sample1, const std::vector< double > &sample2)
	Two-sample t-test. More...

static double	ANOVA (const std::vector< std::vector< double >> &samples)
	Analysis of Variance (ANOVA) More...

static double	chi_square_test (const std::vector< std::vector< int >> &observed, const std::vector< std::vector< double >> &expected)
	Chi-square test of independence. More...

static double	proportion_z_test (double p1, double p2, double n1, double n2)
	Z-test for proportions. More...

static double	wilcoxon_rank_test (const std::vector< double > &sample1, const std::vector< double > &sample2)
	Wilcoxon signed-rank test. More...

static double	mann_whitney_test (const std::vector< double > &sample1, const std::vector< double > &sample2)
	Mann-Whitney U test. More...

static double	fisher_test (const std::vector< std::vector< int >> &table)
	Fisher's Exact Test. More...

static double	kol_smirnov_test (const std::vector< double > &sample1, const std::vector< double > &sample2)
	Kolmogorov-Smirnov Test. More...

static double	wilcoxon_rank_sum_test (const std::vector< double > &sample1, const std::vector< double > &sample2)
	Wilcoxon Rank Sum Test (Mann-Whitney U Test) More...

static double	kruskal_wallis_test (const std::vector< std::vector< double >> &samples)
	Kruskal-Wallis Test. More...

static double	runs_test (const std::vector< bool > &sequence)
	Runs Test. More...

Static Private Member Functions
static int	factorial (int n)
	Helper method to calculate factorial. More...

Detailed Description

A class for conducting various statistical hypothesis tests.

Definition at line 47 of file hypothesis.hpp.

Constructor & Destructor Documentation

◆ HypothesisTest()

gpmp::stats::HypothesisTest::HypothesisTest ( )

inline

Constructor.

Definition at line 52 of file hypothesis.hpp.

52 {

53 }

◆ ~HypothesisTest()

gpmp::stats::HypothesisTest::~HypothesisTest ( )

inline

Destructor.

Definition at line 58 of file hypothesis.hpp.

58 {

59 }

Member Function Documentation

◆ ANOVA()

double gpmp::stats::HypothesisTest::ANOVA ( const std::vector< std::vector< double >> & samples )

static

Analysis of Variance (ANOVA)

Parameters

samples The vector of sample data

Returns: The F-statistic

Definition at line 69 of file hypothesis.cpp.

                                                  {
     int k = samples.size();
     int n = 0;
     double grandMean = 0.0;
     double SSB = 0.0;
     double SSW = 0.0;
  
     // Calculate total number of observations and grand mean
     for (const auto &sample : samples) {
         n += sample.size();
         double sampleMean = gpmp::stats::Describe::mean_arith(sample);
         grandMean += sampleMean;
     }
     grandMean /= k;
  
     // Calculate sum of squares between groups (SSB) and within groups (SSW)
     for (int i = 0; i < k; ++i) {
         double sampleMean = gpmp::stats::Describe::mean_arith(samples[i]);
         for (double x : samples[i]) {
             SSB += pow((sampleMean - grandMean), 2);
             SSW += pow((x - sampleMean), 2);
         }
     }
  
     // Calculate degrees of freedom
     int dfBetweenGroups = k - 1;
     int dfWithinGroups = n - k;
  
     // Calculate F-statistic
     double MSB = SSB / dfBetweenGroups;
     double MSW = SSW / dfWithinGroups;
     double F = MSB / MSW;
  
     return F;
 }

References gpmp::stats::Describe::mean_arith().

Referenced by main().

◆ chi_square_test()

double gpmp::stats::HypothesisTest::chi_square_test	(	const std::vector< std::vector< int >> &	observed,
		const std::vector< std::vector< double >> &	expected
	)

static

Chi-square test of independence.

Parameters

observed	The observed data
expected	The expected data

Returns: The chi-square statistic

Definition at line 107 of file hypothesis.cpp.

                                                   {
     int rows = observed.size();
     int cols = observed[0].size();
     double chiSquare = 0.0;
  
     for (int i = 0; i < rows; ++i) {
         for (int j = 0; j < cols; ++j) {
             chiSquare +=
                 pow((observed[i][j] - expected[i][j]), 2) / expected[i][j];
         }
     }
  
     return chiSquare;
 }

References test_linalg::cols, and test_linalg::rows.

Referenced by main().

◆ factorial()

int gpmp::stats::HypothesisTest::factorial ( int n )

staticprivate

Helper method to calculate factorial.

Parameters

n	The integer value

Returns: The factorial of n

Definition at line 342 of file hypothesis.cpp.

                                             {
     if (n <= 1) {
         return 1;
     }
     return n * factorial(n - 1);
 }

◆ fisher_test()

double gpmp::stats::HypothesisTest::fisher_test ( const std::vector< std::vector< int >> & table )

static

Fisher's Exact Test.

Parameters

table The contingency table

Returns: The p-value

Definition at line 199 of file hypothesis.cpp.

                                             {
     int nRows = table.size();
     int nCols = table[0].size();
  
     if (nRows != 2 || nCols != 2) {
         std::cerr << "Fisher's Exact Test requires a 2x2 contingency table."
                   << std::endl;
         return std::numeric_limits<double>::quiet_NaN();
     }
  
     int a = table[0][0];
     int b = table[0][1];
     int c = table[1][0];
     int d = table[1][1];
  
     double p = (factorial(a + b) * factorial(c + d) * factorial(a + c) *
                 factorial(b + d)) /
                (factorial(a) * factorial(b) * factorial(c) * factorial(d) *
                 factorial(a + b + c + d));
  
     return p;
 }

◆ kol_smirnov_test()

double gpmp::stats::HypothesisTest::kol_smirnov_test	(	const std::vector< double > &	sample1,
		const std::vector< double > &	sample2
	)

static

Kolmogorov-Smirnov Test.

Parameters

sample1	The first sample data
sample2	The second sample data

Returns: The test statistic

Definition at line 224 of file hypothesis.cpp.

                                       {
     int n1 = sample1.size();
     int n2 = sample2.size();
  
     std::vector<double> combinedSamples = sample1;
     combinedSamples.insert(combinedSamples.end(),
                            sample2.begin(),
                            sample2.end());
     std::sort(combinedSamples.begin(), combinedSamples.end());
  
     double maxDPlus = 0.0;
     double maxDMinus = 0.0;
  
     for (size_t i = 0; i < combinedSamples.size(); ++i) {
         double DPlus = (i + 1) / static_cast<double>(n1) - combinedSamples[i];
         double DMinus = combinedSamples[i] - i / static_cast<double>(n2);
  
         maxDPlus = std::max(maxDPlus, DPlus);
         maxDMinus = std::max(maxDMinus, DMinus);
     }
  
     return std::max(maxDPlus, maxDMinus);
 }

◆ kruskal_wallis_test()

double gpmp::stats::HypothesisTest::kruskal_wallis_test ( const std::vector< std::vector< double >> & samples )

static

Kruskal-Wallis Test.

Parameters

samples The vector of sample data

Returns: The test statistic

Definition at line 282 of file hypothesis.cpp.

                                                  {
     int k = samples.size();
     std::vector<std::pair<double, int>> combinedData;
  
     for (int i = 0; i < k; ++i) {
         for (double x : samples[i]) {
             combinedData.push_back(std::make_pair(x, i));
         }
     }
  
     std::sort(combinedData.begin(), combinedData.end());
  
     std::vector<double> ranks;
     ranks.reserve(combinedData.size());
  
     int rank = 1;
     ranks.push_back(rank);
     for (size_t i = 1; i < combinedData.size(); ++i) {
         if (std::abs(combinedData[i].first - combinedData[i - 1].first) >
             std::numeric_limits<double>::epsilon()) {
             rank++;
         }
         ranks.push_back(rank);
     }
  
     double H = 0.0;
     for (int i = 0; i < k; ++i) {
         double rankSum = 0.0;
         for (size_t j = 0; j < samples[i].size(); ++j) {
             rankSum += ranks[i * samples[i].size() + j];
         }
         H += (rankSum * rankSum) / samples[i].size();
     }
     H = (12.0 / (combinedData.size() * (combinedData.size() + 1))) * H -
         3.0 * (combinedData.size() + 1);
  
     return H;
 }

◆ mann_whitney_test()

double gpmp::stats::HypothesisTest::mann_whitney_test	(	const std::vector< double > &	sample1,
		const std::vector< double > &	sample2
	)

static

Mann-Whitney U test.

Parameters

sample1	The first sample data
sample2	The second sample data

Returns: The z-score

Definition at line 177 of file hypothesis.cpp.

                                       {
     int n1 = sample1.size();
     int n2 = sample2.size();
     double U1 = gpmp::stats::Describe::u_stat(sample1, sample2);
     double U2 = gpmp::stats::Describe::u_stat(sample2, sample1);
     double U = std::min(U1, U2);
  
     // Calculate the expected value of U
     double expectedU = n1 * n2 / 2.0;
  
     // Calculate the standard deviation of U
     double stdDev = sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0);
  
     // Calculate the z-score
     double z = (U - expectedU) / stdDev;
  
     return z;
 }

References gpmp::stats::Describe::u_stat().

Referenced by main().

◆ one_sample_ttest()

double gpmp::stats::HypothesisTest::one_sample_ttest	(	const std::vector< double > &	sample,
		double	populationMean
	)

static

One-sample t-test.

Parameters

sample	The sample data
populationMean	The population mean

Returns: The t-statistic

Definition at line 42 of file hypothesis.cpp.

                                                                      {
     int n = sample.size();
     double sampleMean = gpmp::stats::Describe::mean_arith(sample);
     double sampleStdDev = gpmp::stats::Describe::stdev(sample, sampleMean);
     double standardError = sampleStdDev / sqrt(n);
     return (sampleMean - populationMean) / standardError;
 }

References gpmp::stats::Describe::mean_arith(), and gpmp::stats::Describe::stdev().

Referenced by main().

◆ proportion_z_test()

double gpmp::stats::HypothesisTest::proportion_z_test	(	double	p1,
		double	p2,
		double	n1,
		double	n2
	)

static

Z-test for proportions.

Parameters

p1	The proportion for sample 1
p2	The proportion for sample 2
n1	The sample size for sample 1
n2	The sample size for sample 2

Returns: The z-score

Definition at line 125 of file hypothesis.cpp.

                                                                  {
     double p = (p1 * n1 + p2 * n2) / (n1 + n2);
     double z = (p1 - p2) / sqrt(p * (1 - p) * (1 / n1 + 1 / n2));
     return z;
 }

Referenced by main().

◆ runs_test()

double gpmp::stats::HypothesisTest::runs_test ( const std::vector< bool > & sequence )

static

Runs Test.

Parameters

sequence The binary sequence

Returns: The z-score

Definition at line 324 of file hypothesis.cpp.

                                                                   {
     int n = sequence.size();
     int numRuns = 1;
  
     for (int i = 1; i < n; ++i) {
         if (sequence[i] != sequence[i - 1]) {
             numRuns++;
         }
     }
  
     double expectedRuns = (2.0 * n - 1) / 3.0;
     double varianceRuns = (16.0 * n - 29) / 90.0;
     double z = (numRuns - expectedRuns) / sqrt(varianceRuns);
  
     return z;
 }

◆ two_sample_ttest()

double gpmp::stats::HypothesisTest::two_sample_ttest	(	const std::vector< double > &	sample1,
		const std::vector< double > &	sample2
	)

static

Two-sample t-test.

Parameters

sample1	The first sample data
sample2	The second sample data

Returns: The t-statistic

Definition at line 52 of file hypothesis.cpp.

                                       {
     int n1 = sample1.size();
     int n2 = sample2.size();
     double sampleMean1 = gpmp::stats::Describe::mean_arith(sample1);
     double sampleMean2 = gpmp::stats::Describe::mean_arith(sample2);
     double sampleVar1 = gpmp::stats::Describe::variance(sample1, sampleMean1);
     double sampleVar2 = gpmp::stats::Describe::variance(sample2, sampleMean2);
     double pooledVar =
         ((n1 - 1) * sampleVar1 + (n2 - 1) * sampleVar2) / (n1 + n2 - 2);
     double t =
         (sampleMean1 - sampleMean2) / sqrt(pooledVar * (1.0 / n1 + 1.0 / n2));
     return t;
 }

References gpmp::stats::Describe::mean_arith(), and gpmp::stats::Describe::variance().

Referenced by main().

◆ wilcoxon_rank_sum_test()

double gpmp::stats::HypothesisTest::wilcoxon_rank_sum_test	(	const std::vector< double > &	sample1,
		const std::vector< double > &	sample2
	)

static

Wilcoxon Rank Sum Test (Mann-Whitney U Test)

Parameters

sample1	The first sample data
sample2	The second sample data

Returns: The U statistic

Definition at line 251 of file hypothesis.cpp.

                                       {
     int n1 = sample1.size();
     int n2 = sample2.size();
     std::vector<double> ranks;
     ranks.reserve(n1 + n2);
  
     for (double x : sample1) {
         ranks.push_back(x);
     }
     for (double x : sample2) {
         ranks.push_back(x);
     }
  
     std::sort(ranks.begin(), ranks.end());
  
     double rankSum1 = 0.0;
     for (double x : sample1) {
         rankSum1 +=
             std::distance(ranks.begin(),
                           std::lower_bound(ranks.begin(), ranks.end(), x));
     }
  
     double U1 = rankSum1 - (n1 * (n1 + 1)) / 2.0;
     double U2 = n1 * n2 - U1;
  
     return std::min(U1, U2);
 }

◆ wilcoxon_rank_test()

double gpmp::stats::HypothesisTest::wilcoxon_rank_test	(	const std::vector< double > &	sample1,
		const std::vector< double > &	sample2
	)

static

Wilcoxon signed-rank test.

Parameters

sample1	The first sample data
sample2	The second sample data

Returns: The z-score

Definition at line 135 of file hypothesis.cpp.

                                       {
     int n = sample1.size();
     if (n != static_cast<int>(sample2.size())) {
         std::cerr << "Sample sizes must be equal for Wilcoxon signed-rank test."
                   << std::endl;
         return std::numeric_limits<double>::quiet_NaN();
     }
  
     std::vector<double> differences;
     for (int i = 0; i < n; ++i) {
         differences.push_back(sample1[i] - sample2[i]);
     }
     std::sort(differences.begin(), differences.end(), [](double a, double b) {
         return std::abs(a) < std::abs(b);
     });
  
     double Tplus = 0;
     double Tminus = 0;
     int numPositive = 0;
     int numNegative = 0;
     for (double diff : differences) {
         if (diff > 0) {
             Tplus += diff;
             numPositive++;
         } else if (diff < 0) {
             Tminus -= diff;
             numNegative++;
         }
     }
     int T = std::min(Tplus, Tminus);
  
     // Calculate the critical value using the normal approximation
     double mean = n * (n + 1) / 4.0;
     double stdDev = sqrt(n * (n + 1) * (2 * n + 1) / 24.0);
     double z = (T - mean) / stdDev;
  
     return z;
 }

Referenced by main().

The documentation for this class was generated from the following files:

include/openGPMP/stats/hypothesis.hpp
modules/stats/hypothesis.cpp

Public Member Functions

Static Public Member Functions

Static Private Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ HypothesisTest()

◆ ~HypothesisTest()

Member Function Documentation

◆ ANOVA()

◆ chi_square_test()

◆ factorial()

◆ fisher_test()

◆ kol_smirnov_test()

◆ kruskal_wallis_test()

◆ mann_whitney_test()

◆ one_sample_ttest()

◆ proportion_z_test()

◆ runs_test()

◆ two_sample_ttest()

◆ wilcoxon_rank_sum_test()

◆ wilcoxon_rank_test()