openGPMP
Open Source Mathematics Package
Static Public Member Functions | List of all members
gpmp::stats::Resampling Class Reference

A class providing various resampling methods for statistical analysis. More...

#include <resampling.hpp>

Static Public Member Functions

static std::vector< int > bootstrap (const std::vector< int > &data, int numSamples)
 Perform bootstrap resampling. More...
 
static std::vector< int > subsample (const std::vector< int > &data, int numSamples)
 Perform subsampling. More...
 
static std::vector< std::vector< int > > jackknife (const std::vector< int > &data)
 Perform jackknife resampling. More...
 
static std::vector< std::vector< int > > permutation_test (const std::vector< int > &data, int numPermutations)
 Perform permutation test. More...
 
static std::vector< double > bootstrap_t (const std::vector< double > &data, int numSamples)
 Perform bootstrap t-statistic resampling. More...
 
static std::pair< double, double > bootstrap_ci (const std::vector< double > &data, double alpha, int numSamples)
 Calculate confidence interval using bootstrap. More...
 
static std::vector< double > smoothed_bootstrap (const std::vector< double > &data, int numSamples)
 Perform smoothed bootstrap resampling. More...
 
static std::vector< double > circular_block_bootstrap (const std::vector< double > &data, int blockSize, int numSamples)
 Perform circular block bootstrap resampling. More...
 
static std::vector< double > time_series_bootstrap (const std::vector< double > &data, int numSamples)
 Perform time series bootstrap resampling. More...
 
static std::vector< double > weighted_bootstrap (const std::vector< double > &data, const std::vector< double > &weights, int size)
 Perform weighted bootstrap resampling. More...
 
static double permutation_p_value (const std::vector< double > &data1, const std::vector< double > &data2, double observedStatistic)
 Calculate the p-value using permutation test. More...
 

Detailed Description

A class providing various resampling methods for statistical analysis.

Definition at line 48 of file resampling.hpp.

Member Function Documentation

◆ bootstrap()

std::vector< int > gpmp::stats::Resampling::bootstrap ( const std::vector< int > &  data,
int  numSamples 
)
static

Perform bootstrap resampling.

Parameters
dataThe original data
numSamplesThe number of bootstrap samples
Returns
A vector of bootstrap samples

Definition at line 40 of file resampling.cpp.

41  {
42  std::vector<int> resampledData;
43  resampledData.reserve(numSamples);
44 
45  std::mt19937 rng(std::random_device{}());
46  std::uniform_int_distribution<int> dist(0, data.size() - 1);
47 
48  for (int i = 0; i < numSamples; ++i) {
49  int index = dist(rng);
50  resampledData.push_back(data[index]);
51  }
52 
53  return resampledData;
54 }

◆ bootstrap_ci()

std::pair< double, double > gpmp::stats::Resampling::bootstrap_ci ( const std::vector< double > &  data,
double  alpha,
int  numSamples 
)
static

Calculate confidence interval using bootstrap.

Parameters
dataThe original data
alphaThe significance level
numSamplesThe number of bootstrap samples
Returns
A pair representing the lower and upper bounds of the confidence interval

Definition at line 137 of file resampling.cpp.

139  {
140  std::vector<double> resampledMeans = bootstrap_t(data, numSamples);
141  std::sort(resampledMeans.begin(), resampledMeans.end());
142  int lowerIndex = static_cast<int>((alpha / 2) * numSamples);
143  int upperIndex = static_cast<int>((1 - alpha / 2) * numSamples) - 1;
144  return std::make_pair(resampledMeans[lowerIndex],
145  resampledMeans[upperIndex]);
146 }
static std::vector< double > bootstrap_t(const std::vector< double > &data, int numSamples)
Perform bootstrap t-statistic resampling.
Definition: resampling.cpp:111

◆ bootstrap_t()

std::vector< double > gpmp::stats::Resampling::bootstrap_t ( const std::vector< double > &  data,
int  numSamples 
)
static

Perform bootstrap t-statistic resampling.

Parameters
dataThe original data
numSamplesThe number of bootstrap samples
Returns
A vector of bootstrap t-statistics

Definition at line 111 of file resampling.cpp.

112  {
113  std::vector<double> resampledMeans;
114  resampledMeans.reserve(numSamples);
115 
116  std::mt19937 rng(std::random_device{}());
117  std::uniform_int_distribution<int> dist(0, data.size() - 1);
118 
119  for (int i = 0; i < numSamples; ++i) {
120  std::vector<double> resampledData;
121  resampledData.reserve(data.size());
122  for (int j = 0; j < static_cast<int>(data.size()); ++j) {
123  int index = dist(rng);
124  resampledData.push_back(data[index]);
125  }
126  double mean =
127  std::accumulate(resampledData.begin(), resampledData.end(), 0.0) /
128  resampledData.size();
129  resampledMeans.push_back(mean);
130  }
131 
132  return resampledMeans;
133 }

◆ circular_block_bootstrap()

std::vector< double > gpmp::stats::Resampling::circular_block_bootstrap ( const std::vector< double > &  data,
int  blockSize,
int  numSamples 
)
static

Perform circular block bootstrap resampling.

Parameters
dataThe original data
blockSizeThe size of each block
numSamplesThe number of bootstrap samples
Returns
A vector of circular block bootstrap samples

Definition at line 171 of file resampling.cpp.

174  {
175 
176  if (blockSize <= 0 || blockSize > static_cast<int>(data.size())) {
177  throw std::invalid_argument("Invalid block size");
178  }
179 
180  std::vector<double> resampledData;
181  resampledData.reserve(numSamples);
182 
183  std::mt19937 rng(std::random_device{}());
184  std::uniform_int_distribution<int> dist(0, data.size() - 1);
185 
186  for (int i = 0; i < numSamples; ++i) {
187  std::vector<double> blockMeans;
188  blockMeans.reserve(data.size() / blockSize);
189  for (int j = 0; j < static_cast<int>(data.size()) / blockSize; ++j) {
190  double sum = 0.0;
191  for (int k = 0; k < blockSize; ++k) {
192  int index = dist(rng);
193  sum += data[index];
194  }
195  blockMeans.push_back(sum / blockSize);
196  }
197  std::shuffle(blockMeans.begin(), blockMeans.end(), rng);
198  resampledData.insert(resampledData.end(),
199  blockMeans.begin(),
200  blockMeans.end());
201  }
202 
203  return resampledData;
204 }

◆ jackknife()

std::vector< std::vector< int > > gpmp::stats::Resampling::jackknife ( const std::vector< int > &  data)
static

Perform jackknife resampling.

Parameters
dataThe original data
Returns
A vector of jackknife resamples

Definition at line 77 of file resampling.cpp.

77  {
78  int n = data.size();
79  std::vector<std::vector<int>> resampledDatasets;
80  resampledDatasets.reserve(n);
81 
82  for (int i = 0; i < n; ++i) {
83  std::vector<int> resampledData = data;
84  resampledData.erase(resampledData.begin() + i);
85  resampledDatasets.push_back(resampledData);
86  }
87 
88  return resampledDatasets;
89 }

◆ permutation_p_value()

double gpmp::stats::Resampling::permutation_p_value ( const std::vector< double > &  data1,
const std::vector< double > &  data2,
double  observedStatistic 
)
static

Calculate the p-value using permutation test.

Parameters
data1The first data set
data2The second data set
observedStatisticThe observed test statistic
Returns
The permutation p-value

Definition at line 247 of file resampling.cpp.

249  {
250  int count = 0;
251  std::vector<double> combinedData = data1;
252  combinedData.insert(combinedData.end(), data2.begin(), data2.end());
253  std::shuffle(combinedData.begin(),
254  combinedData.end(),
255  std::mt19937(std::random_device()()));
256  std::vector<double> permutedData1(data1.begin(), data1.end());
257  std::vector<double> permutedData2(data2.begin(), data2.end());
258  for (int i = 0; i < 1000; ++i) {
259  std::shuffle(combinedData.begin(),
260  combinedData.end(),
261  std::mt19937(std::random_device()()));
262  auto permutedStatistic =
263  (std::accumulate(combinedData.begin(),
264  combinedData.begin() + data1.size(),
265  0.0)) /
266  data1.size();
267  if (permutedStatistic >= observedStatistic) {
268  count++;
269  }
270  }
271  return count / 1000.0;
272 }

◆ permutation_test()

std::vector< std::vector< int > > gpmp::stats::Resampling::permutation_test ( const std::vector< int > &  data,
int  numPermutations 
)
static

Perform permutation test.

Parameters
dataThe original data
numPermutationsThe number of permutations
Returns
A vector of permuted data sets

Definition at line 93 of file resampling.cpp.

94  {
95  std::vector<std::vector<int>> permutedDatasets;
96  permutedDatasets.reserve(numPermutations);
97 
98  std::mt19937 rng(std::random_device{}());
99 
100  for (int i = 0; i < numPermutations; ++i) {
101  std::vector<int> permutedData = data;
102  std::shuffle(permutedData.begin(), permutedData.end(), rng);
103  permutedDatasets.push_back(permutedData);
104  }
105 
106  return permutedDatasets;
107 }

◆ smoothed_bootstrap()

std::vector< double > gpmp::stats::Resampling::smoothed_bootstrap ( const std::vector< double > &  data,
int  numSamples 
)
static

Perform smoothed bootstrap resampling.

Parameters
dataThe original data
numSamplesThe number of bootstrap samples
Returns
A vector of smoothed bootstrap samples

Definition at line 150 of file resampling.cpp.

151  {
152  std::vector<double> resampledData;
153  resampledData.reserve(numSamples);
154 
155  std::mt19937 rng(std::random_device{}());
156  std::uniform_int_distribution<int> dist(0, data.size() - 1);
157 
158  for (int i = 0; i < numSamples; ++i) {
159  double sum = 0.0;
160  for (int j = 0; j < static_cast<int>(data.size()); ++j) {
161  int index = dist(rng);
162  sum += data[index];
163  }
164  resampledData.push_back(sum / data.size());
165  }
166 
167  return resampledData;
168 }

◆ subsample()

std::vector< int > gpmp::stats::Resampling::subsample ( const std::vector< int > &  data,
int  numSamples 
)
static

Perform subsampling.

Parameters
dataThe original data
numSamplesThe number of subsamples
Returns
A vector of subsampled data

Definition at line 58 of file resampling.cpp.

59  {
60  if (numSamples > static_cast<int>(data.size())) {
61  throw std::invalid_argument(
62  "Number of samples cannot exceed data size");
63  }
64 
65  std::vector<int> resampledData = data;
66  std::shuffle(resampledData.begin(),
67  resampledData.end(),
68  std::mt19937(std::random_device()()));
69 
70  resampledData.resize(numSamples);
71 
72  return resampledData;
73 }

◆ time_series_bootstrap()

std::vector< double > gpmp::stats::Resampling::time_series_bootstrap ( const std::vector< double > &  data,
int  numSamples 
)
static

Perform time series bootstrap resampling.

Parameters
dataThe original time series data
numSamplesThe number of bootstrap samples
Returns
A vector of time series bootstrap samples

Definition at line 208 of file resampling.cpp.

209  {
210  std::vector<double> resampledData;
211  resampledData.reserve(numSamples);
212 
213  std::mt19937 rng(std::random_device{}());
214  std::uniform_int_distribution<int> dist(0, data.size() - 1);
215 
216  for (int i = 0; i < numSamples; ++i) {
217  std::vector<double> resampledSequence;
218  resampledSequence.reserve(data.size());
219  int startIndex = dist(rng);
220  for (int j = 0; j < static_cast<int>(data.size()); ++j) {
221  int index = (startIndex + j) % data.size();
222  resampledSequence.push_back(data[index]);
223  }
224  resampledData.insert(resampledData.end(),
225  resampledSequence.begin(),
226  resampledSequence.end());
227  }
228 
229  return resampledData;
230 }

◆ weighted_bootstrap()

std::vector< double > gpmp::stats::Resampling::weighted_bootstrap ( const std::vector< double > &  data,
const std::vector< double > &  weights,
int  size 
)
static

Perform weighted bootstrap resampling.

Parameters
dataThe original data
weightsThe weights associated with each data point
sizeThe size of each bootstrap sample
Returns
A vector of weighted bootstrap samples

Definition at line 233 of file resampling.cpp.

235  {
236  std::vector<double> resampledData;
237  std::random_device rd;
238  std::mt19937 gen(rd());
239  std::discrete_distribution<> dis(weights.begin(), weights.end());
240  for (int i = 0; i < size; ++i) {
241  resampledData.push_back(data[dis(gen)]);
242  }
243  return resampledData;
244 }

The documentation for this class was generated from the following files: