From d9aa72ea42bff6e7731dc6f7d00b4a97e9d7c686 Mon Sep 17 00:00:00 2001 From: colintbowers Date: Sun, 28 Jul 2019 20:15:52 +1000 Subject: [PATCH] add_dep_boot --- Manifest.toml | 17 ++++++ Project.toml | 3 +- README.md | 3 +- src/Bootstrap.jl | 6 +++ src/dependent_bootstrap.jl | 86 +++++++++++++++++++++++++++++++ test/runtests.jl | 1 + test/test-bootsample-dependent.jl | 80 ++++++++++++++++++++++++++++ 7 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 src/dependent_bootstrap.jl create mode 100644 test/test-bootsample-dependent.jl diff --git a/Manifest.toml b/Manifest.toml index 76f7a17..611fa2d 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -65,6 +65,12 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" deps = ["Mmap"] uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" +[[DependentBootstrap]] +deps = ["DataFrames", "Distributions", "StatsBase", "TimeSeries"] +git-tree-sha1 = "ea6f5b7fcc57ed25f2e584f17837d9f3d68919be" +uuid = "191d1da1-0f37-5779-b8ea-a655caa0c150" +version = "1.1.2" + [[Distributed]] deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" @@ -149,6 +155,11 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" deps = ["Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +[[RecipesBase]] +git-tree-sha1 = "7bdce29bc9b2f5660a6e5e64d64d91ec941f6aa2" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "0.7.0" + [[Reexport]] deps = ["Pkg"] git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" @@ -238,6 +249,12 @@ version = "0.1.18" deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +[[TimeSeries]] +deps = ["Dates", "DelimitedFiles", "RecipesBase", "Reexport", "Statistics", "Tables"] +git-tree-sha1 = "abd4bd489ed0ed88532498d1aa1d2e41d7b05485" +uuid = "9e3dc215-6440-5c97-bce1-76c03772f85e" +version = "0.16.0" + [[TranscodingStreams]] deps = ["Pkg", "Random", "Test"] git-tree-sha1 = "f42956022d8084539f1d7219f632542b0ea686ce" diff --git a/Project.toml b/Project.toml index e5bbd9c..f3f8b13 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ version = "2.1.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +DependentBootstrap = "191d1da1-0f37-5779-b8ea-a655caa0c150" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -11,8 +12,8 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] -julia = "1" StatsModels = "< 0.6.0" +julia = "1" [extras] GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" diff --git a/README.md b/README.md index 0008632..ff577ed 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Bootstrapping is a widely applicable technique for statistical estimation. deterministic bootstrap, suited for small samples sizes - Resampling of residuals in generalized linear models (`ResidualSampling`, `WildSampling`) - Maximum Entropy bootstrapping for dependent and non-stationary datasets (`MaximumEntropySampling`) + - Block bootstrapping for stationary or near-epoch-dependent time-series (`StationarySampling`, `MovingBlockSampling`, `CircularBlockSampling`, `NoOverlapBlockSampling`) - Confidence intervals: - Basic (`BasicConfInt`) @@ -70,7 +71,7 @@ resamples and with different bootstrapping approaches. ```julia using Statistics # the `std` methods live here - + n_boot = 1000 ## basic bootstrap diff --git a/src/Bootstrap.jl b/src/Bootstrap.jl index aa3f347..c69ff1f 100644 --- a/src/Bootstrap.jl +++ b/src/Bootstrap.jl @@ -13,6 +13,7 @@ using StatsBase using Distributions using DataFrames using StatsModels +using DependentBootstrap import Base: iterate, eltype, length, size import StatsBase: confint, stderror @@ -33,6 +34,10 @@ export MaximumEntropySampling, ResidualSampling, WildSampling, + StationarySampling, + MovingBlockSampling, + CircularBlockSampling, + NoOverlapBlockSampling, BootstrapSample, NonParametricBootstrapSample, ParametricBootstrapSample, @@ -57,6 +62,7 @@ include("stats.jl") include("nobs.jl") include("draw.jl") include("bootsampling.jl") +include("dependent_bootstrap.jl") include("get.jl") include("show.jl") include("confint.jl") diff --git a/src/dependent_bootstrap.jl b/src/dependent_bootstrap.jl new file mode 100644 index 0000000..31303a9 --- /dev/null +++ b/src/dependent_bootstrap.jl @@ -0,0 +1,86 @@ + +"DependentBootstrapSampling <- Abstract supertype for nesting all DependentBootstrap.jl sampling types" +abstract type DependentBootstrapSampling <: BootstrapSampling ; end + +""" +Stationary Bootstrap Sampling + +```julia +StationarySampling(1000, 5.0) +``` + +""" +struct StationarySampling <: DependentBootstrapSampling + nrun::Int + blocklength::Float64 +end +db_type(x::StationarySampling) = DependentBootstrap.BootStationary() +db_blocklength_type(x::StationarySampling) = DependentBootstrap.BLPPW2009() + +""" +Moving Block Bootstrap Sampling + +```julia +MovingBlockSampling(1000, 5.0) +``` + +""" +struct MovingBlockSampling <: DependentBootstrapSampling + nrun::Int + blocklength::Float64 +end +db_type(x::MovingBlockSampling) = DependentBootstrap.BootMoving() +db_blocklength_type(x::MovingBlockSampling) = DependentBootstrap.BLPPW2009() + + +""" +Circular Block Bootstrap Sampling + +```julia +CircularBlockSampling(1000, 5.0) +``` + +""" +struct CircularBlockSampling <: DependentBootstrapSampling + nrun::Int + blocklength::Float64 +end +db_type(x::CircularBlockSampling) = DependentBootstrap.BootCircular() +db_blocklength_type(x::CircularBlockSampling) = DependentBootstrap.BLPPW2009() + + +""" +No Overlap Block Bootstrap Sampling + +```julia +NonoverlappingBlockSampling(1000, 5.0) +``` + +""" +struct NoOverlapBlockSampling <: DependentBootstrapSampling + nrun::Int + blocklength::Float64 +end +db_type(x::NoOverlapBlockSampling) = DependentBootstrap.BootNoOverlap() +db_blocklength_type(x::NoOverlapBlockSampling) = DependentBootstrap.BLPPW2009() + +""" +bootstrap(statistic, data, BasicSampling()) +""" +function bootstrap(statistic::Function, data, sampling::T) where {T<:DependentBootstrapSampling} + t0 = tx(statistic(data)) + m = nrun(sampling) + t1 = zeros_tuple(t0, m) + bootmethod = db_type(sampling) + blocklengthmethod = db_blocklength_type(sampling) + bi = DependentBootstrap.BootInput(data, nobs(data), sampling.blocklength, m, bootmethod, + blocklengthmethod, db_dummy_func, db_dummy_func, nobs(data), median) + for i in 1:m + data1 = DependentBootstrap.dbootdata_one(data, bi) + for (j, t) in enumerate(tx(statistic(data1))) + t1[j][i] = t + end + end + return NonParametricBootstrapSample(t0, t1, statistic, data, sampling) +end +db_dummy_func() = error("Logic fail in method called on subtype of DependentBootstrapSampling. This function should never be called") diff --git a/test/runtests.jl b/test/runtests.jl index 720f03a..5874d52 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,6 @@ tests = ["test-bootsample-non-parametric", "test-bootsample-parametric", + "test-bootsample-dependent", "test-utils", "test-dependencies"] diff --git a/test/test-bootsample-dependent.jl b/test/test-bootsample-dependent.jl new file mode 100644 index 0000000..23defa3 --- /dev/null +++ b/test/test-bootsample-dependent.jl @@ -0,0 +1,80 @@ +module TestBootsampleDependent + +using Statistics +using Bootstrap +using DependentBootstrap +using Random +using Test +using DataFrames + +#The primary test implemented here is to ensure the statistics generated +#by Bootstrap.bootstrap are identical to the statistics generated by +#DependentBootstrap.dbootlevel1. + +function local_df_to_vec_test_func(x::DataFrame)::Vector{Float64} + #If you change the contents of this function then test on DataFrame will fail + return [ mean(x[:,k]) for k = 1:size(x, 2) ] +end + + +@testset "Dependent bootstraps" begin + @test Bootstrap.DependentBootstrapSampling <: Bootstrap.BootstrapSampling + numresample = 10; + blocklength = 2; + methodtupbias = [(StationarySampling(numresample,blocklength),:stationary,-0.09178896729821062), + (MovingBlockSampling(numresample,blocklength),:moving,-0.033677652536413416), + (CircularBlockSampling(numresample,blocklength),:circular,-0.03855034907102495), + (NoOverlapBlockSampling(numresample,blocklength),:nooverlap,-0.061662288880307115)]; + atol = 1e-14 + #Testing a Vector{Float64} input and Float64 output + x1 = [0.9960305161608586,-0.7212881182263334,-0.34349923950301975,0.9611290799383404,-0.8259196925605968, + 0.6151944356507073,-0.43995051045510575,-1.13295801631931,1.2948258643438737,0.2578453384656824] + testfunc = std; + for (methodboot, depsym, biasresult) in methodtupbias + @test typeof(methodboot) <: Bootstrap.DependentBootstrapSampling + Random.seed!(1234) + b1 = bootstrap(testfunc, x1, methodboot) + @test length(b1.t0) == 1 + @test isapprox(b1.t0[1], testfunc(x1), atol=atol) + @test b1.statistic == testfunc + @test b1.sampling == methodboot + @test b1.data == x1 + Random.seed!(1234) + s1 = dbootlevel1(x1, numresample=numresample, blocklength=blocklength, bootmethod=depsym, flevel1=testfunc) + @test length(b1.t1) == 1 + @test length(b1.t1[1]) == length(s1) + for n = 1:length(s1) + @test isapprox(b1.t1[1][n], s1[n], atol=atol) + end + @test isapprox(bias(b1)[1],biasresult,atol=atol) + end + #Testing a DataFrame input and Vector{Float64} output + Random.seed!(1111) + x2numcol = 2 + x2 = DataFrame(hcat(x1, x1.^2)) + numresample = 10 + blocklength = 2 + testfunc = local_df_to_vec_test_func + for (methodboot, depsym, irrelevantvariable) in methodtupbias + Random.seed!(1234) + b1 = bootstrap(testfunc, x2, methodboot) + @test length(b1.t0) == x2numcol + for k = 1:x2numcol + @test isapprox(b1.t0[k], mean(x2[:,k]), atol=atol) + end + @test b1.statistic == testfunc + @test b1.sampling == methodboot + @test b1.data == x2 + Random.seed!(1234) + s1 = dbootlevel1(x2, numresample=numresample, blocklength=blocklength, bootmethod=depsym, flevel1=testfunc) + @test length(b1.t1) == x2numcol + for k = 1:x2numcol + @test length(b1.t1[k]) == length(s1) + for n = 1:length(s1) + @test isapprox(b1.t1[k][n], s1[n][k], atol=atol) + end + end + end +end + +end #module