LCOV - code coverage report
Current view: top level - modules/stats - resampling.cpp (source / functions) Hit Total Coverage
Test: lcov.info Lines: 0 145 0.0 %
Date: 2024-05-13 05:06:06 Functions: 0 11 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*************************************************************************
       2             :  *
       3             :  *  Project
       4             :  *                         _____ _____  __  __ _____
       5             :  *                        / ____|  __ \|  \/  |  __ \
       6             :  *  ___  _ __   ___ _ __ | |  __| |__) | \  / | |__) |
       7             :  * / _ \| '_ \ / _ \ '_ \| | |_ |  ___/| |\/| |  ___/
       8             :  *| (_) | |_) |  __/ | | | |__| | |    | |  | | |
       9             :  * \___/| .__/ \___|_| |_|\_____|_|    |_|  |_|_|
      10             :  *      | |
      11             :  *      |_|
      12             :  *
      13             :  * Copyright (C) Akiel Aries, <akiel@akiel.org>, et al.
      14             :  *
      15             :  * This software is licensed as described in the file LICENSE, which
      16             :  * you should have received as part of this distribution. The terms
      17             :  * among other details are referenced in the official documentation
      18             :  * seen here : https://akielaries.github.io/openGPMP/ along with
      19             :  * important files seen in this project.
      20             :  *
      21             :  * You may opt to use, copy, modify, merge, publish, distribute
      22             :  * and/or sell copies of the Software, and permit persons to whom
      23             :  * the Software is furnished to do so, under the terms of the
      24             :  * LICENSE file.
      25             :  *
      26             :  *
      27             :  *
      28             :  * This software is distributed on an AS IS basis, WITHOUT
      29             :  * WARRANTY OF ANY KIND, either express or implied.
      30             :  *
      31             :  ************************************************************************/
      32             : #include <algorithm>
      33             : #include <openGPMP/stats/resampling.hpp>
      34             : #include <random>
      35             : #include <stdexcept>
      36             : #include <vector>
      37             : 
      38             : // Resample with replacement
      39             : std::vector<int>
      40           0 : gpmp::stats::Resampling::bootstrap(const std::vector<int> &data,
      41             :                                    int numSamples) {
      42           0 :     std::vector<int> resampledData;
      43           0 :     resampledData.reserve(numSamples);
      44             : 
      45           0 :     std::mt19937 rng(std::random_device{}());
      46           0 :     std::uniform_int_distribution<int> dist(0, data.size() - 1);
      47             : 
      48           0 :     for (int i = 0; i < numSamples; ++i) {
      49           0 :         int index = dist(rng);
      50           0 :         resampledData.push_back(data[index]);
      51             :     }
      52             : 
      53           0 :     return resampledData;
      54           0 : }
      55             : 
      56             : // Resample without replacement
      57             : std::vector<int>
      58           0 : gpmp::stats::Resampling::subsample(const std::vector<int> &data,
      59             :                                    int numSamples) {
      60           0 :     if (numSamples > static_cast<int>(data.size())) {
      61           0 :         throw std::invalid_argument(
      62           0 :             "Number of samples cannot exceed data size");
      63             :     }
      64             : 
      65           0 :     std::vector<int> resampledData = data;
      66           0 :     std::shuffle(resampledData.begin(),
      67             :                  resampledData.end(),
      68           0 :                  std::mt19937(std::random_device()()));
      69             : 
      70           0 :     resampledData.resize(numSamples);
      71             : 
      72           0 :     return resampledData;
      73           0 : }
      74             : 
      75             : // Jackknife resampling
      76             : std::vector<std::vector<int>>
      77           0 : gpmp::stats::Resampling::jackknife(const std::vector<int> &data) {
      78           0 :     int n = data.size();
      79           0 :     std::vector<std::vector<int>> resampledDatasets;
      80           0 :     resampledDatasets.reserve(n);
      81             : 
      82           0 :     for (int i = 0; i < n; ++i) {
      83           0 :         std::vector<int> resampledData = data;
      84           0 :         resampledData.erase(resampledData.begin() + i);
      85           0 :         resampledDatasets.push_back(resampledData);
      86           0 :     }
      87             : 
      88           0 :     return resampledDatasets;
      89           0 : }
      90             : 
      91             : // Permutation test
      92             : std::vector<std::vector<int>>
      93           0 : gpmp::stats::Resampling::permutation_test(const std::vector<int> &data,
      94             :                                           int numPermutations) {
      95           0 :     std::vector<std::vector<int>> permutedDatasets;
      96           0 :     permutedDatasets.reserve(numPermutations);
      97             : 
      98           0 :     std::mt19937 rng(std::random_device{}());
      99             : 
     100           0 :     for (int i = 0; i < numPermutations; ++i) {
     101           0 :         std::vector<int> permutedData = data;
     102           0 :         std::shuffle(permutedData.begin(), permutedData.end(), rng);
     103           0 :         permutedDatasets.push_back(permutedData);
     104           0 :     }
     105             : 
     106           0 :     return permutedDatasets;
     107           0 : }
     108             : 
     109             : // Bootstrap-t method
     110             : std::vector<double>
     111           0 : gpmp::stats::Resampling::bootstrap_t(const std::vector<double> &data,
     112             :                                      int numSamples) {
     113           0 :     std::vector<double> resampledMeans;
     114           0 :     resampledMeans.reserve(numSamples);
     115             : 
     116           0 :     std::mt19937 rng(std::random_device{}());
     117           0 :     std::uniform_int_distribution<int> dist(0, data.size() - 1);
     118             : 
     119           0 :     for (int i = 0; i < numSamples; ++i) {
     120           0 :         std::vector<double> resampledData;
     121           0 :         resampledData.reserve(data.size());
     122           0 :         for (int j = 0; j < static_cast<int>(data.size()); ++j) {
     123           0 :             int index = dist(rng);
     124           0 :             resampledData.push_back(data[index]);
     125             :         }
     126             :         double mean =
     127           0 :             std::accumulate(resampledData.begin(), resampledData.end(), 0.0) /
     128           0 :             resampledData.size();
     129           0 :         resampledMeans.push_back(mean);
     130           0 :     }
     131             : 
     132           0 :     return resampledMeans;
     133           0 : }
     134             : 
     135             : // Bootstrapped confidence interval
     136             : std::pair<double, double>
     137           0 : gpmp::stats::Resampling::bootstrap_ci(const std::vector<double> &data,
     138             :                                       double alpha,
     139             :                                       int numSamples) {
     140           0 :     std::vector<double> resampledMeans = bootstrap_t(data, numSamples);
     141           0 :     std::sort(resampledMeans.begin(), resampledMeans.end());
     142           0 :     int lowerIndex = static_cast<int>((alpha / 2) * numSamples);
     143           0 :     int upperIndex = static_cast<int>((1 - alpha / 2) * numSamples) - 1;
     144           0 :     return std::make_pair(resampledMeans[lowerIndex],
     145           0 :                           resampledMeans[upperIndex]);
     146           0 : }
     147             : 
     148             : // Smoothed bootstrap
     149             : std::vector<double>
     150           0 : gpmp::stats::Resampling::smoothed_bootstrap(const std::vector<double> &data,
     151             :                                             int numSamples) {
     152           0 :     std::vector<double> resampledData;
     153           0 :     resampledData.reserve(numSamples);
     154             : 
     155           0 :     std::mt19937 rng(std::random_device{}());
     156           0 :     std::uniform_int_distribution<int> dist(0, data.size() - 1);
     157             : 
     158           0 :     for (int i = 0; i < numSamples; ++i) {
     159           0 :         double sum = 0.0;
     160           0 :         for (int j = 0; j < static_cast<int>(data.size()); ++j) {
     161           0 :             int index = dist(rng);
     162           0 :             sum += data[index];
     163             :         }
     164           0 :         resampledData.push_back(sum / data.size());
     165             :     }
     166             : 
     167           0 :     return resampledData;
     168           0 : }
     169             : 
     170             : // Circular block bootstrap
     171           0 : std::vector<double> gpmp::stats::Resampling::circular_block_bootstrap(
     172             :     const std::vector<double> &data,
     173             :     int blockSize,
     174             :     int numSamples) {
     175             : 
     176           0 :     if (blockSize <= 0 || blockSize > static_cast<int>(data.size())) {
     177           0 :         throw std::invalid_argument("Invalid block size");
     178             :     }
     179             : 
     180           0 :     std::vector<double> resampledData;
     181           0 :     resampledData.reserve(numSamples);
     182             : 
     183           0 :     std::mt19937 rng(std::random_device{}());
     184           0 :     std::uniform_int_distribution<int> dist(0, data.size() - 1);
     185             : 
     186           0 :     for (int i = 0; i < numSamples; ++i) {
     187           0 :         std::vector<double> blockMeans;
     188           0 :         blockMeans.reserve(data.size() / blockSize);
     189           0 :         for (int j = 0; j < static_cast<int>(data.size()) / blockSize; ++j) {
     190           0 :             double sum = 0.0;
     191           0 :             for (int k = 0; k < blockSize; ++k) {
     192           0 :                 int index = dist(rng);
     193           0 :                 sum += data[index];
     194             :             }
     195           0 :             blockMeans.push_back(sum / blockSize);
     196             :         }
     197           0 :         std::shuffle(blockMeans.begin(), blockMeans.end(), rng);
     198           0 :         resampledData.insert(resampledData.end(),
     199             :                              blockMeans.begin(),
     200             :                              blockMeans.end());
     201           0 :     }
     202             : 
     203           0 :     return resampledData;
     204           0 : }
     205             : 
     206             : // Time series bootstrap
     207             : std::vector<double>
     208           0 : gpmp::stats::Resampling::time_series_bootstrap(const std::vector<double> &data,
     209             :                                                int numSamples) {
     210           0 :     std::vector<double> resampledData;
     211           0 :     resampledData.reserve(numSamples);
     212             : 
     213           0 :     std::mt19937 rng(std::random_device{}());
     214           0 :     std::uniform_int_distribution<int> dist(0, data.size() - 1);
     215             : 
     216           0 :     for (int i = 0; i < numSamples; ++i) {
     217           0 :         std::vector<double> resampledSequence;
     218           0 :         resampledSequence.reserve(data.size());
     219           0 :         int startIndex = dist(rng);
     220           0 :         for (int j = 0; j < static_cast<int>(data.size()); ++j) {
     221           0 :             int index = (startIndex + j) % data.size();
     222           0 :             resampledSequence.push_back(data[index]);
     223             :         }
     224           0 :         resampledData.insert(resampledData.end(),
     225             :                              resampledSequence.begin(),
     226             :                              resampledSequence.end());
     227           0 :     }
     228             : 
     229           0 :     return resampledData;
     230           0 : }
     231             : 
     232             : std::vector<double>
     233           0 : gpmp::stats::Resampling::weighted_bootstrap(const std::vector<double> &data,
     234             :                                             const std::vector<double> &weights,
     235             :                                             int size) {
     236           0 :     std::vector<double> resampledData;
     237           0 :     std::random_device rd;
     238           0 :     std::mt19937 gen(rd());
     239           0 :     std::discrete_distribution<> dis(weights.begin(), weights.end());
     240           0 :     for (int i = 0; i < size; ++i) {
     241           0 :         resampledData.push_back(data[dis(gen)]);
     242             :     }
     243           0 :     return resampledData;
     244           0 : }
     245             : 
     246             : double
     247           0 : gpmp::stats::Resampling::permutation_p_value(const std::vector<double> &data1,
     248             :                                              const std::vector<double> &data2,
     249             :                                              double observedStatistic) {
     250           0 :     int count = 0;
     251           0 :     std::vector<double> combinedData = data1;
     252           0 :     combinedData.insert(combinedData.end(), data2.begin(), data2.end());
     253           0 :     std::shuffle(combinedData.begin(),
     254             :                  combinedData.end(),
     255           0 :                  std::mt19937(std::random_device()()));
     256           0 :     std::vector<double> permutedData1(data1.begin(), data1.end());
     257           0 :     std::vector<double> permutedData2(data2.begin(), data2.end());
     258           0 :     for (int i = 0; i < 1000; ++i) {
     259           0 :         std::shuffle(combinedData.begin(),
     260             :                      combinedData.end(),
     261           0 :                      std::mt19937(std::random_device()()));
     262             :         auto permutedStatistic =
     263           0 :             (std::accumulate(combinedData.begin(),
     264           0 :                              combinedData.begin() + data1.size(),
     265           0 :                              0.0)) /
     266           0 :             data1.size();
     267           0 :         if (permutedStatistic >= observedStatistic) {
     268           0 :             count++;
     269             :         }
     270             :     }
     271           0 :     return count / 1000.0;
     272           0 : }

Generated by: LCOV version 1.14