43 double populationMean) {
44 int n = sample.size();
47 double standardError = sampleStdDev / sqrt(n);
48 return (sampleMean - populationMean) / standardError;
53 const std::vector<double> &sample1,
54 const std::vector<double> &sample2) {
55 int n1 = sample1.size();
56 int n2 = sample2.size();
62 ((n1 - 1) * sampleVar1 + (n2 - 1) * sampleVar2) / (n1 + n2 - 2);
64 (sampleMean1 - sampleMean2) / sqrt(pooledVar * (1.0 / n1 + 1.0 / n2));
70 const std::vector<std::vector<double>> &samples) {
71 int k = samples.size();
73 double grandMean = 0.0;
78 for (
const auto &sample : samples) {
81 grandMean += sampleMean;
86 for (
int i = 0; i < k; ++i) {
88 for (
double x : samples[i]) {
89 SSB += pow((sampleMean - grandMean), 2);
90 SSW += pow((x - sampleMean), 2);
95 int dfBetweenGroups = k - 1;
96 int dfWithinGroups = n - k;
99 double MSB = SSB / dfBetweenGroups;
100 double MSW = SSW / dfWithinGroups;
101 double F = MSB / MSW;
108 const std::vector<std::vector<int>> &observed,
109 const std::vector<std::vector<double>> &expected) {
110 int rows = observed.size();
111 int cols = observed[0].size();
112 double chiSquare = 0.0;
114 for (
int i = 0; i <
rows; ++i) {
115 for (
int j = 0; j <
cols; ++j) {
117 pow((observed[i][j] - expected[i][j]), 2) / expected[i][j];
129 double p = (p1 * n1 + p2 * n2) / (n1 + n2);
130 double z = (p1 - p2) / sqrt(p * (1 - p) * (1 / n1 + 1 / n2));
136 const std::vector<double> &sample1,
137 const std::vector<double> &sample2) {
138 int n = sample1.size();
139 if (n !=
static_cast<int>(sample2.size())) {
140 std::cerr <<
"Sample sizes must be equal for Wilcoxon signed-rank test."
142 return std::numeric_limits<double>::quiet_NaN();
145 std::vector<double> differences;
146 for (
int i = 0; i < n; ++i) {
147 differences.push_back(sample1[i] - sample2[i]);
149 std::sort(differences.begin(), differences.end(), [](
double a,
double b) {
150 return std::abs(a) < std::abs(b);
157 for (
double diff : differences) {
161 }
else if (diff < 0) {
166 int T = std::min(Tplus, Tminus);
169 double mean = n * (n + 1) / 4.0;
170 double stdDev = sqrt(n * (n + 1) * (2 * n + 1) / 24.0);
171 double z = (T - mean) / stdDev;
178 const std::vector<double> &sample1,
179 const std::vector<double> &sample2) {
180 int n1 = sample1.size();
181 int n2 = sample2.size();
184 double U = std::min(U1, U2);
187 double expectedU = n1 * n2 / 2.0;
190 double stdDev = sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0);
193 double z = (U - expectedU) / stdDev;
200 const std::vector<std::vector<int>> &table) {
201 int nRows = table.size();
202 int nCols = table[0].size();
204 if (nRows != 2 || nCols != 2) {
205 std::cerr <<
"Fisher's Exact Test requires a 2x2 contingency table."
207 return std::numeric_limits<double>::quiet_NaN();
215 double p = (factorial(a + b) * factorial(c + d) * factorial(a + c) *
217 (factorial(a) * factorial(b) * factorial(c) * factorial(d) *
218 factorial(a + b + c + d));
225 const std::vector<double> &sample1,
226 const std::vector<double> &sample2) {
227 int n1 = sample1.size();
228 int n2 = sample2.size();
230 std::vector<double> combinedSamples = sample1;
231 combinedSamples.insert(combinedSamples.end(),
234 std::sort(combinedSamples.begin(), combinedSamples.end());
236 double maxDPlus = 0.0;
237 double maxDMinus = 0.0;
239 for (
size_t i = 0; i < combinedSamples.size(); ++i) {
240 double DPlus = (i + 1) /
static_cast<double>(n1) - combinedSamples[i];
241 double DMinus = combinedSamples[i] - i /
static_cast<double>(n2);
243 maxDPlus = std::max(maxDPlus, DPlus);
244 maxDMinus = std::max(maxDMinus, DMinus);
247 return std::max(maxDPlus, maxDMinus);
252 const std::vector<double> &sample1,
253 const std::vector<double> &sample2) {
254 int n1 = sample1.size();
255 int n2 = sample2.size();
256 std::vector<double> ranks;
257 ranks.reserve(n1 + n2);
259 for (
double x : sample1) {
262 for (
double x : sample2) {
266 std::sort(ranks.begin(), ranks.end());
268 double rankSum1 = 0.0;
269 for (
double x : sample1) {
271 std::distance(ranks.begin(),
272 std::lower_bound(ranks.begin(), ranks.end(), x));
275 double U1 = rankSum1 - (n1 * (n1 + 1)) / 2.0;
276 double U2 = n1 * n2 - U1;
278 return std::min(U1, U2);
283 const std::vector<std::vector<double>> &samples) {
284 int k = samples.size();
285 std::vector<std::pair<double, int>> combinedData;
287 for (
int i = 0; i < k; ++i) {
288 for (
double x : samples[i]) {
289 combinedData.push_back(std::make_pair(x, i));
293 std::sort(combinedData.begin(), combinedData.end());
295 std::vector<double> ranks;
296 ranks.reserve(combinedData.size());
299 ranks.push_back(rank);
300 for (
size_t i = 1; i < combinedData.size(); ++i) {
301 if (std::abs(combinedData[i].first - combinedData[i - 1].first) >
302 std::numeric_limits<double>::epsilon()) {
305 ranks.push_back(rank);
309 for (
int i = 0; i < k; ++i) {
310 double rankSum = 0.0;
311 for (
size_t j = 0; j < samples[i].size(); ++j) {
312 rankSum += ranks[i * samples[i].size() + j];
314 H += (rankSum * rankSum) / samples[i].size();
316 H = (12.0 / (combinedData.size() * (combinedData.size() + 1))) * H -
317 3.0 * (combinedData.size() + 1);
325 int n = sequence.size();
328 for (
int i = 1; i < n; ++i) {
329 if (sequence[i] != sequence[i - 1]) {
334 double expectedRuns = (2.0 * n - 1) / 3.0;
335 double varianceRuns = (16.0 * n - 29) / 90.0;
336 double z = (numRuns - expectedRuns) / sqrt(varianceRuns);
346 return n * factorial(n - 1);
static double u_stat(const std::vector< double > &sample1, const std::vector< double > &sample2)
Calculates U statistic given two samples.
static double stdev(const std::vector< double > &data, double mean)
Calculates the standard deviation of a given dataset, given the mean.
static double variance(const std::vector< double > &data, double mean)
Calculates the variance of a given dataset, given the mean.
static double mean_arith(const std::vector< double > &data)
Calculates the arithmetic mean of a given dataset.
static int factorial(int n)
Helper method to calculate factorial.
static double fisher_test(const std::vector< std::vector< int >> &table)
Fisher's Exact Test.
static double mann_whitney_test(const std::vector< double > &sample1, const std::vector< double > &sample2)
Mann-Whitney U test.
static double two_sample_ttest(const std::vector< double > &sample1, const std::vector< double > &sample2)
Two-sample t-test.
static double chi_square_test(const std::vector< std::vector< int >> &observed, const std::vector< std::vector< double >> &expected)
Chi-square test of independence.
static double runs_test(const std::vector< bool > &sequence)
Runs Test.
static double kol_smirnov_test(const std::vector< double > &sample1, const std::vector< double > &sample2)
Kolmogorov-Smirnov Test.
static double ANOVA(const std::vector< std::vector< double >> &samples)
Analysis of Variance (ANOVA)
static double one_sample_ttest(const std::vector< double > &sample, double populationMean)
One-sample t-test.
static double proportion_z_test(double p1, double p2, double n1, double n2)
Z-test for proportions.
static double kruskal_wallis_test(const std::vector< std::vector< double >> &samples)
Kruskal-Wallis Test.
static double wilcoxon_rank_test(const std::vector< double > &sample1, const std::vector< double > &sample2)
Wilcoxon signed-rank test.
static double wilcoxon_rank_sum_test(const std::vector< double > &sample1, const std::vector< double > &sample2)
Wilcoxon Rank Sum Test (Mann-Whitney U Test)