Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"

[[DependentBootstrap]]
deps = ["DataFrames", "Distributions", "StatsBase", "TimeSeries"]
git-tree-sha1 = "ea6f5b7fcc57ed25f2e584f17837d9f3d68919be"
uuid = "191d1da1-0f37-5779-b8ea-a655caa0c150"
version = "1.1.2"

[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Expand Down Expand Up @@ -149,6 +155,11 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[RecipesBase]]
git-tree-sha1 = "7bdce29bc9b2f5660a6e5e64d64d91ec941f6aa2"
uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
version = "0.7.0"

[[Reexport]]
deps = ["Pkg"]
git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0"
Expand Down Expand Up @@ -238,6 +249,12 @@ version = "0.1.18"
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[TimeSeries]]
deps = ["Dates", "DelimitedFiles", "RecipesBase", "Reexport", "Statistics", "Tables"]
git-tree-sha1 = "abd4bd489ed0ed88532498d1aa1d2e41d7b05485"
uuid = "9e3dc215-6440-5c97-bce1-76c03772f85e"
version = "0.16.0"

[[TranscodingStreams]]
deps = ["Pkg", "Random", "Test"]
git-tree-sha1 = "f42956022d8084539f1d7219f632542b0ea686ce"
Expand Down
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@ version = "2.1.0"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DependentBootstrap = "191d1da1-0f37-5779-b8ea-a655caa0c150"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"

[compat]
julia = "1"
StatsModels = "< 0.6.0"
julia = "1"

[extras]
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Bootstrapping is a widely applicable technique for statistical estimation.
deterministic bootstrap, suited for small samples sizes
- Resampling of residuals in generalized linear models (`ResidualSampling`, `WildSampling`)
- Maximum Entropy bootstrapping for dependent and non-stationary datasets (`MaximumEntropySampling`)
- Block bootstrapping for stationary or near-epoch-dependent time-series (`StationarySampling`, `MovingBlockSampling`, `CircularBlockSampling`, `NoOverlapBlockSampling`)

- Confidence intervals:
- Basic (`BasicConfInt`)
Expand Down Expand Up @@ -70,7 +71,7 @@ resamples and with different bootstrapping approaches.

```julia
using Statistics # the `std` methods live here

n_boot = 1000

## basic bootstrap
Expand Down
6 changes: 6 additions & 0 deletions src/Bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ using StatsBase
using Distributions
using DataFrames
using StatsModels
using DependentBootstrap

import Base: iterate, eltype, length, size
import StatsBase: confint, stderror
Expand All @@ -33,6 +34,10 @@ export
MaximumEntropySampling,
ResidualSampling,
WildSampling,
StationarySampling,
MovingBlockSampling,
CircularBlockSampling,
NoOverlapBlockSampling,
BootstrapSample,
NonParametricBootstrapSample,
ParametricBootstrapSample,
Expand All @@ -57,6 +62,7 @@ include("stats.jl")
include("nobs.jl")
include("draw.jl")
include("bootsampling.jl")
include("dependent_bootstrap.jl")
include("get.jl")
include("show.jl")
include("confint.jl")
Expand Down
86 changes: 86 additions & 0 deletions src/dependent_bootstrap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@

"DependentBootstrapSampling <- Abstract supertype for nesting all DependentBootstrap.jl sampling types"
abstract type DependentBootstrapSampling <: BootstrapSampling ; end

"""
Stationary Bootstrap Sampling

```julia
StationarySampling(1000, 5.0)
```

"""
struct StationarySampling <: DependentBootstrapSampling
nrun::Int
blocklength::Float64
end
db_type(x::StationarySampling) = DependentBootstrap.BootStationary()
db_blocklength_type(x::StationarySampling) = DependentBootstrap.BLPPW2009()

"""
Moving Block Bootstrap Sampling

```julia
MovingBlockSampling(1000, 5.0)
```

"""
struct MovingBlockSampling <: DependentBootstrapSampling
nrun::Int
blocklength::Float64
end
db_type(x::MovingBlockSampling) = DependentBootstrap.BootMoving()
db_blocklength_type(x::MovingBlockSampling) = DependentBootstrap.BLPPW2009()


"""
Circular Block Bootstrap Sampling

```julia
CircularBlockSampling(1000, 5.0)
```

"""
struct CircularBlockSampling <: DependentBootstrapSampling
nrun::Int
blocklength::Float64
end
db_type(x::CircularBlockSampling) = DependentBootstrap.BootCircular()
db_blocklength_type(x::CircularBlockSampling) = DependentBootstrap.BLPPW2009()


"""
No Overlap Block Bootstrap Sampling

```julia
NonoverlappingBlockSampling(1000, 5.0)
```

"""
struct NoOverlapBlockSampling <: DependentBootstrapSampling
nrun::Int
blocklength::Float64
end
db_type(x::NoOverlapBlockSampling) = DependentBootstrap.BootNoOverlap()
db_blocklength_type(x::NoOverlapBlockSampling) = DependentBootstrap.BLPPW2009()

"""
bootstrap(statistic, data, BasicSampling())
"""
function bootstrap(statistic::Function, data, sampling::T) where {T<:DependentBootstrapSampling}
t0 = tx(statistic(data))
m = nrun(sampling)
t1 = zeros_tuple(t0, m)
bootmethod = db_type(sampling)
blocklengthmethod = db_blocklength_type(sampling)
bi = DependentBootstrap.BootInput(data, nobs(data), sampling.blocklength, m, bootmethod,
blocklengthmethod, db_dummy_func, db_dummy_func, nobs(data), median)
for i in 1:m
data1 = DependentBootstrap.dbootdata_one(data, bi)
for (j, t) in enumerate(tx(statistic(data1)))
t1[j][i] = t
end
end
return NonParametricBootstrapSample(t0, t1, statistic, data, sampling)
end
db_dummy_func() = error("Logic fail in method called on subtype of DependentBootstrapSampling. This function should never be called")
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
tests = ["test-bootsample-non-parametric",
"test-bootsample-parametric",
"test-bootsample-dependent",
"test-utils",
"test-dependencies"]

Expand Down
80 changes: 80 additions & 0 deletions test/test-bootsample-dependent.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
module TestBootsampleDependent

using Statistics
using Bootstrap
using DependentBootstrap
using Random
using Test
using DataFrames

#The primary test implemented here is to ensure the statistics generated
#by Bootstrap.bootstrap are identical to the statistics generated by
#DependentBootstrap.dbootlevel1.

function local_df_to_vec_test_func(x::DataFrame)::Vector{Float64}
#If you change the contents of this function then test on DataFrame will fail
return [ mean(x[:,k]) for k = 1:size(x, 2) ]
end


@testset "Dependent bootstraps" begin
@test Bootstrap.DependentBootstrapSampling <: Bootstrap.BootstrapSampling
numresample = 10;
blocklength = 2;
methodtupbias = [(StationarySampling(numresample,blocklength),:stationary,-0.09178896729821062),
(MovingBlockSampling(numresample,blocklength),:moving,-0.033677652536413416),
(CircularBlockSampling(numresample,blocklength),:circular,-0.03855034907102495),
(NoOverlapBlockSampling(numresample,blocklength),:nooverlap,-0.061662288880307115)];
atol = 1e-14
#Testing a Vector{Float64} input and Float64 output
x1 = [0.9960305161608586,-0.7212881182263334,-0.34349923950301975,0.9611290799383404,-0.8259196925605968,
0.6151944356507073,-0.43995051045510575,-1.13295801631931,1.2948258643438737,0.2578453384656824]
testfunc = std;
for (methodboot, depsym, biasresult) in methodtupbias
@test typeof(methodboot) <: Bootstrap.DependentBootstrapSampling
Random.seed!(1234)
b1 = bootstrap(testfunc, x1, methodboot)
@test length(b1.t0) == 1
@test isapprox(b1.t0[1], testfunc(x1), atol=atol)
@test b1.statistic == testfunc
@test b1.sampling == methodboot
@test b1.data == x1
Random.seed!(1234)
s1 = dbootlevel1(x1, numresample=numresample, blocklength=blocklength, bootmethod=depsym, flevel1=testfunc)
@test length(b1.t1) == 1
@test length(b1.t1[1]) == length(s1)
for n = 1:length(s1)
@test isapprox(b1.t1[1][n], s1[n], atol=atol)
end
@test isapprox(bias(b1)[1],biasresult,atol=atol)
end
#Testing a DataFrame input and Vector{Float64} output
Random.seed!(1111)
x2numcol = 2
x2 = DataFrame(hcat(x1, x1.^2))
numresample = 10
blocklength = 2
testfunc = local_df_to_vec_test_func
for (methodboot, depsym, irrelevantvariable) in methodtupbias
Random.seed!(1234)
b1 = bootstrap(testfunc, x2, methodboot)
@test length(b1.t0) == x2numcol
for k = 1:x2numcol
@test isapprox(b1.t0[k], mean(x2[:,k]), atol=atol)
end
@test b1.statistic == testfunc
@test b1.sampling == methodboot
@test b1.data == x2
Random.seed!(1234)
s1 = dbootlevel1(x2, numresample=numresample, blocklength=blocklength, bootmethod=depsym, flevel1=testfunc)
@test length(b1.t1) == x2numcol
for k = 1:x2numcol
@test length(b1.t1[k]) == length(s1)
for n = 1:length(s1)
@test isapprox(b1.t1[k][n], s1[n][k], atol=atol)
end
end
end
end

end #module