A class for conducting various statistical hypothesis tests.
More...
#include <hypothesis.hpp>
|
static double | one_sample_ttest (const std::vector< double > &sample, double populationMean) |
| One-sample t-test. More...
|
|
static double | two_sample_ttest (const std::vector< double > &sample1, const std::vector< double > &sample2) |
| Two-sample t-test. More...
|
|
static double | ANOVA (const std::vector< std::vector< double >> &samples) |
| Analysis of Variance (ANOVA) More...
|
|
static double | chi_square_test (const std::vector< std::vector< int >> &observed, const std::vector< std::vector< double >> &expected) |
| Chi-square test of independence. More...
|
|
static double | proportion_z_test (double p1, double p2, double n1, double n2) |
| Z-test for proportions. More...
|
|
static double | wilcoxon_rank_test (const std::vector< double > &sample1, const std::vector< double > &sample2) |
| Wilcoxon signed-rank test. More...
|
|
static double | mann_whitney_test (const std::vector< double > &sample1, const std::vector< double > &sample2) |
| Mann-Whitney U test. More...
|
|
static double | fisher_test (const std::vector< std::vector< int >> &table) |
| Fisher's Exact Test. More...
|
|
static double | kol_smirnov_test (const std::vector< double > &sample1, const std::vector< double > &sample2) |
| Kolmogorov-Smirnov Test. More...
|
|
static double | wilcoxon_rank_sum_test (const std::vector< double > &sample1, const std::vector< double > &sample2) |
| Wilcoxon Rank Sum Test (Mann-Whitney U Test) More...
|
|
static double | kruskal_wallis_test (const std::vector< std::vector< double >> &samples) |
| Kruskal-Wallis Test. More...
|
|
static double | runs_test (const std::vector< bool > &sequence) |
| Runs Test. More...
|
|
A class for conducting various statistical hypothesis tests.
Definition at line 47 of file hypothesis.hpp.
◆ HypothesisTest()
gpmp::stats::HypothesisTest::HypothesisTest |
( |
| ) |
|
|
inline |
◆ ~HypothesisTest()
gpmp::stats::HypothesisTest::~HypothesisTest |
( |
| ) |
|
|
inline |
◆ ANOVA()
double gpmp::stats::HypothesisTest::ANOVA |
( |
const std::vector< std::vector< double >> & |
samples | ) |
|
|
static |
Analysis of Variance (ANOVA)
- Parameters
-
samples | The vector of sample data |
- Returns
- The F-statistic
Definition at line 69 of file hypothesis.cpp.
71 int k = samples.size();
73 double grandMean = 0.0;
78 for (
const auto &sample : samples) {
81 grandMean += sampleMean;
86 for (
int i = 0; i < k; ++i) {
88 for (
double x : samples[i]) {
89 SSB += pow((sampleMean - grandMean), 2);
90 SSW += pow((x - sampleMean), 2);
95 int dfBetweenGroups = k - 1;
96 int dfWithinGroups = n - k;
99 double MSB = SSB / dfBetweenGroups;
100 double MSW = SSW / dfWithinGroups;
101 double F = MSB / MSW;
static double mean_arith(const std::vector< double > &data)
Calculates the arithmetic mean of a given dataset.
References gpmp::stats::Describe::mean_arith().
Referenced by main().
◆ chi_square_test()
double gpmp::stats::HypothesisTest::chi_square_test |
( |
const std::vector< std::vector< int >> & |
observed, |
|
|
const std::vector< std::vector< double >> & |
expected |
|
) |
| |
|
static |
Chi-square test of independence.
- Parameters
-
observed | The observed data |
expected | The expected data |
- Returns
- The chi-square statistic
Definition at line 107 of file hypothesis.cpp.
110 int rows = observed.size();
111 int cols = observed[0].size();
112 double chiSquare = 0.0;
114 for (
int i = 0; i <
rows; ++i) {
115 for (
int j = 0; j <
cols; ++j) {
117 pow((observed[i][j] - expected[i][j]), 2) / expected[i][j];
References test_linalg::cols, and test_linalg::rows.
Referenced by main().
◆ factorial()
int gpmp::stats::HypothesisTest::factorial |
( |
int |
n | ) |
|
|
staticprivate |
Helper method to calculate factorial.
- Parameters
-
- Returns
- The factorial of n
Definition at line 342 of file hypothesis.cpp.
static int factorial(int n)
Helper method to calculate factorial.
◆ fisher_test()
double gpmp::stats::HypothesisTest::fisher_test |
( |
const std::vector< std::vector< int >> & |
table | ) |
|
|
static |
Fisher's Exact Test.
- Parameters
-
table | The contingency table |
- Returns
- The p-value
Definition at line 199 of file hypothesis.cpp.
201 int nRows = table.size();
202 int nCols = table[0].size();
204 if (nRows != 2 || nCols != 2) {
205 std::cerr <<
"Fisher's Exact Test requires a 2x2 contingency table."
207 return std::numeric_limits<double>::quiet_NaN();
◆ kol_smirnov_test()
double gpmp::stats::HypothesisTest::kol_smirnov_test |
( |
const std::vector< double > & |
sample1, |
|
|
const std::vector< double > & |
sample2 |
|
) |
| |
|
static |
Kolmogorov-Smirnov Test.
- Parameters
-
sample1 | The first sample data |
sample2 | The second sample data |
- Returns
- The test statistic
Definition at line 224 of file hypothesis.cpp.
227 int n1 = sample1.size();
228 int n2 = sample2.size();
230 std::vector<double> combinedSamples = sample1;
231 combinedSamples.insert(combinedSamples.end(),
234 std::sort(combinedSamples.begin(), combinedSamples.end());
236 double maxDPlus = 0.0;
237 double maxDMinus = 0.0;
239 for (
size_t i = 0; i < combinedSamples.size(); ++i) {
240 double DPlus = (i + 1) /
static_cast<double>(n1) - combinedSamples[i];
241 double DMinus = combinedSamples[i] - i /
static_cast<double>(n2);
243 maxDPlus = std::max(maxDPlus, DPlus);
244 maxDMinus = std::max(maxDMinus, DMinus);
247 return std::max(maxDPlus, maxDMinus);
◆ kruskal_wallis_test()
double gpmp::stats::HypothesisTest::kruskal_wallis_test |
( |
const std::vector< std::vector< double >> & |
samples | ) |
|
|
static |
Kruskal-Wallis Test.
- Parameters
-
samples | The vector of sample data |
- Returns
- The test statistic
Definition at line 282 of file hypothesis.cpp.
284 int k = samples.size();
285 std::vector<std::pair<double, int>> combinedData;
287 for (
int i = 0; i < k; ++i) {
288 for (
double x : samples[i]) {
289 combinedData.push_back(std::make_pair(x, i));
293 std::sort(combinedData.begin(), combinedData.end());
295 std::vector<double> ranks;
296 ranks.reserve(combinedData.size());
299 ranks.push_back(rank);
300 for (
size_t i = 1; i < combinedData.size(); ++i) {
301 if (std::abs(combinedData[i].first - combinedData[i - 1].first) >
302 std::numeric_limits<double>::epsilon()) {
305 ranks.push_back(rank);
309 for (
int i = 0; i < k; ++i) {
310 double rankSum = 0.0;
311 for (
size_t j = 0; j < samples[i].size(); ++j) {
312 rankSum += ranks[i * samples[i].size() + j];
314 H += (rankSum * rankSum) / samples[i].size();
316 H = (12.0 / (combinedData.size() * (combinedData.size() + 1))) * H -
317 3.0 * (combinedData.size() + 1);
◆ mann_whitney_test()
double gpmp::stats::HypothesisTest::mann_whitney_test |
( |
const std::vector< double > & |
sample1, |
|
|
const std::vector< double > & |
sample2 |
|
) |
| |
|
static |
Mann-Whitney U test.
- Parameters
-
sample1 | The first sample data |
sample2 | The second sample data |
- Returns
- The z-score
Definition at line 177 of file hypothesis.cpp.
180 int n1 = sample1.size();
181 int n2 = sample2.size();
184 double U = std::min(U1, U2);
187 double expectedU = n1 * n2 / 2.0;
190 double stdDev = sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0);
193 double z = (U - expectedU) / stdDev;
static double u_stat(const std::vector< double > &sample1, const std::vector< double > &sample2)
Calculates U statistic given two samples.
References gpmp::stats::Describe::u_stat().
Referenced by main().
◆ one_sample_ttest()
double gpmp::stats::HypothesisTest::one_sample_ttest |
( |
const std::vector< double > & |
sample, |
|
|
double |
populationMean |
|
) |
| |
|
static |
One-sample t-test.
- Parameters
-
sample | The sample data |
populationMean | The population mean |
- Returns
- The t-statistic
Definition at line 42 of file hypothesis.cpp.
44 int n = sample.size();
47 double standardError = sampleStdDev / sqrt(n);
48 return (sampleMean - populationMean) / standardError;
static double stdev(const std::vector< double > &data, double mean)
Calculates the standard deviation of a given dataset, given the mean.
References gpmp::stats::Describe::mean_arith(), and gpmp::stats::Describe::stdev().
Referenced by main().
◆ proportion_z_test()
double gpmp::stats::HypothesisTest::proportion_z_test |
( |
double |
p1, |
|
|
double |
p2, |
|
|
double |
n1, |
|
|
double |
n2 |
|
) |
| |
|
static |
Z-test for proportions.
- Parameters
-
p1 | The proportion for sample 1 |
p2 | The proportion for sample 2 |
n1 | The sample size for sample 1 |
n2 | The sample size for sample 2 |
- Returns
- The z-score
Definition at line 125 of file hypothesis.cpp.
129 double p = (p1 * n1 + p2 * n2) / (n1 + n2);
130 double z = (p1 - p2) / sqrt(p * (1 - p) * (1 / n1 + 1 / n2));
Referenced by main().
◆ runs_test()
double gpmp::stats::HypothesisTest::runs_test |
( |
const std::vector< bool > & |
sequence | ) |
|
|
static |
Runs Test.
- Parameters
-
sequence | The binary sequence |
- Returns
- The z-score
Definition at line 324 of file hypothesis.cpp.
325 int n = sequence.size();
328 for (
int i = 1; i < n; ++i) {
329 if (sequence[i] != sequence[i - 1]) {
334 double expectedRuns = (2.0 * n - 1) / 3.0;
335 double varianceRuns = (16.0 * n - 29) / 90.0;
336 double z = (numRuns - expectedRuns) / sqrt(varianceRuns);
◆ two_sample_ttest()
double gpmp::stats::HypothesisTest::two_sample_ttest |
( |
const std::vector< double > & |
sample1, |
|
|
const std::vector< double > & |
sample2 |
|
) |
| |
|
static |
Two-sample t-test.
- Parameters
-
sample1 | The first sample data |
sample2 | The second sample data |
- Returns
- The t-statistic
Definition at line 52 of file hypothesis.cpp.
55 int n1 = sample1.size();
56 int n2 = sample2.size();
62 ((n1 - 1) * sampleVar1 + (n2 - 1) * sampleVar2) / (n1 + n2 - 2);
64 (sampleMean1 - sampleMean2) / sqrt(pooledVar * (1.0 / n1 + 1.0 / n2));
static double variance(const std::vector< double > &data, double mean)
Calculates the variance of a given dataset, given the mean.
References gpmp::stats::Describe::mean_arith(), and gpmp::stats::Describe::variance().
Referenced by main().
◆ wilcoxon_rank_sum_test()
double gpmp::stats::HypothesisTest::wilcoxon_rank_sum_test |
( |
const std::vector< double > & |
sample1, |
|
|
const std::vector< double > & |
sample2 |
|
) |
| |
|
static |
Wilcoxon Rank Sum Test (Mann-Whitney U Test)
- Parameters
-
sample1 | The first sample data |
sample2 | The second sample data |
- Returns
- The U statistic
Definition at line 251 of file hypothesis.cpp.
254 int n1 = sample1.size();
255 int n2 = sample2.size();
256 std::vector<double> ranks;
257 ranks.reserve(n1 + n2);
259 for (
double x : sample1) {
262 for (
double x : sample2) {
266 std::sort(ranks.begin(), ranks.end());
268 double rankSum1 = 0.0;
269 for (
double x : sample1) {
271 std::distance(ranks.begin(),
272 std::lower_bound(ranks.begin(), ranks.end(), x));
275 double U1 = rankSum1 - (n1 * (n1 + 1)) / 2.0;
276 double U2 = n1 * n2 - U1;
278 return std::min(U1, U2);
◆ wilcoxon_rank_test()
double gpmp::stats::HypothesisTest::wilcoxon_rank_test |
( |
const std::vector< double > & |
sample1, |
|
|
const std::vector< double > & |
sample2 |
|
) |
| |
|
static |
Wilcoxon signed-rank test.
- Parameters
-
sample1 | The first sample data |
sample2 | The second sample data |
- Returns
- The z-score
Definition at line 135 of file hypothesis.cpp.
138 int n = sample1.size();
139 if (n !=
static_cast<int>(sample2.size())) {
140 std::cerr <<
"Sample sizes must be equal for Wilcoxon signed-rank test."
142 return std::numeric_limits<double>::quiet_NaN();
145 std::vector<double> differences;
146 for (
int i = 0; i < n; ++i) {
147 differences.push_back(sample1[i] - sample2[i]);
149 std::sort(differences.begin(), differences.end(), [](
double a,
double b) {
150 return std::abs(a) < std::abs(b);
157 for (
double diff : differences) {
161 }
else if (diff < 0) {
166 int T = std::min(Tplus, Tminus);
169 double mean = n * (n + 1) / 4.0;
170 double stdDev = sqrt(n * (n + 1) * (2 * n + 1) / 24.0);
171 double z = (T - mean) / stdDev;
Referenced by main().
The documentation for this class was generated from the following files: