Skip to content

Commit 0a873b0

Browse files
authored
Support for rank deficiency in bootstrap (#755)
* coef! method * use coef! instead of fixef! in bootstrap * ignore version specific manifests * slight efficiency tweak * fixef! with different target eltype for GLMM * simulate! tweaks * add test for rank deficient bootstrap * behavior note * version bump * NEWS update * tweak nightly runner
1 parent cfd3023 commit 0a873b0

File tree

9 files changed

+59
-35
lines changed

9 files changed

+59
-35
lines changed

.github/workflows/nightly.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ jobs:
3030
version: ${{ matrix.julia-version }}
3131
- uses: julia-actions/cache@v1
3232
with:
33-
cache-compiled: "true"
34-
- uses: julia-actions/julia-buildpkg@v1
33+
cache-compiled: true
3534
- uses: julia-actions/julia-runtest@v1
3635
env:
3736
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ docs/src/assets/*.svg
44
test/scratch.jl
55
tune.json
66
Manifest.toml
7+
Manifest-v1.*.toml
78
settings.json
89
docs/jmd
910
LocalPreferences.toml

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
MixedModels v4.23.0 Release Notes
2+
==============================
3+
* Support for rank deficiency in the parametric bootstrap. [#755]
4+
15
MixedModels v4.22.5 Release Notes
26
==============================
37
* Use `muladd` where possible to enable fused multiply-add (FMA) on architectures with hardware support. FMA will generally improve computational speed and gives more accurate rounding. [#740]
@@ -504,3 +508,4 @@ Package dependencies
504508
[#740]: https://github.com/JuliaStats/MixedModels.jl/issues/740
505509
[#744]: https://github.com/JuliaStats/MixedModels.jl/issues/744
506510
[#748]: https://github.com/JuliaStats/MixedModels.jl/issues/748
511+
[#755]: https://github.com/JuliaStats/MixedModels.jl/issues/755

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "MixedModels"
22
uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
33
author = ["Phillip Alday <[email protected]>", "Douglas Bates <[email protected]>", "Jose Bayoan Santiago Calderon <[email protected]>"]
4-
version = "4.22.5"
4+
version = "4.23.0"
55

66
[deps]
77
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"

src/bootstrap.jl

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ been constructed with the same structure as the source of the saved replicates.
8080
8181
The two-argument method constructs a [`MixedModelBootstrap`](@ref) with the same eltype as `m`.
8282
If an eltype is specified as the third argument, then a `MixedModelBootstrap` is returned.
83-
If a subtype of `MixedModelFitCollection` is specified as the third argument, then that
83+
If a subtype of `MixedModelFitCollection` is specified as the third argument, then that
8484
is the return type.
8585
8686
See also [`savereplicates`](@ref), [`restoreoptsum!`](@ref).
@@ -91,7 +91,7 @@ end
9191

9292
# why this weird second method? it allows us to define custom types and write methods
9393
# to load into those types directly. For example, we could define a `PowerAnalysis <: MixedModelFitCollection`
94-
# in MixedModelsSim and then overload this method to get a convenient object.
94+
# in MixedModelsSim and then overload this method to get a convenient object.
9595
# Also, this allows us to write `restorereplicateS(f, m, ::Type{<:MixedModelNonparametricBootstrap})` for
9696
# entities in MixedModels bootstrap
9797
function restorereplicates(
@@ -120,14 +120,14 @@ end
120120
"""
121121
savereplicates(f, b::MixedModelFitCollection)
122122
123-
Save the replicates associated with a [`MixedModelFitCollection`](@ref),
124-
e.g. [`MixedModelBootstrap`](@ref) as an Arrow file.
123+
Save the replicates associated with a [`MixedModelFitCollection`](@ref),
124+
e.g. [`MixedModelBootstrap`](@ref) as an Arrow file.
125125
126126
See also [`restorereplicates`](@ref), [`saveoptsum`](@ref)
127127
128128
!!! note
129129
**Only** the replicates are saved, not the entire contents of the `MixedModelFitCollection`.
130-
`restorereplicates` requires a model compatible with the bootstrap to restore the full object.
130+
`restorereplicates` requires a model compatible with the bootstrap to restore the full object.
131131
"""
132132
savereplicates(f, b::MixedModelFitCollection) = Arrow.write(f, b.fits)
133133

@@ -200,6 +200,14 @@ fit during the bootstrapping process. For example, `optsum_overrides=(;ftol_rel=
200200
reduces the convergence criterion, which can greatly speed up the bootstrap fits.
201201
Taking advantage of this speed up to increase `n` can often lead to better estimates
202202
of coverage intervals.
203+
204+
205+
!!! note
206+
All coefficients are bootstrapped. In the rank deficient case, the inestimatable coefficients are
207+
treated as -0.0 in the simulations underlying the bootstrap, which will generally result
208+
in their estimate from the simulated data also being being inestimable and thus set to -0.0.
209+
**However this behavior may change in future releases to explicitly drop the
210+
extraneous columns before simulation and thus not include their estimates in the bootstrap result.**
203211
"""
204212
function parametricbootstrap(
205213
rng::AbstractRNG,
@@ -234,7 +242,7 @@ function parametricbootstrap(
234242
# this seemed to slow things down?!
235243
# _copy_away_from_lowerbd!(m.optsum.initial, morig.optsum.final, m.lowerbd; incr=0.05)
236244

237-
β_names = Tuple(Symbol.(fixefnames(morig)))
245+
β_names = Tuple(Symbol.(coefnames(morig)))
238246

239247
use_threads && Base.depwarn(
240248
"use_threads is deprecated and will be removed in a future release",
@@ -247,7 +255,7 @@ function parametricbootstrap(
247255
(
248256
objective=ftype.(m.objective),
249257
σ=ismissing(m.σ) ? missing : ftype(m.σ),
250-
β=NamedTuple{β_names}(fixef!(βsc, m)),
258+
β=NamedTuple{β_names}(coef!(βsc, m)),
251259
se=SVector{p,ftype}(stderror!(βsc, m)),
252260
θ=SVector{k,ftype}(getθ!(θsc, m)),
253261
)
@@ -331,8 +339,8 @@ end
331339
332340
Compute bootstrap confidence intervals for coefficients and variance components, with confidence level level (by default 95%).
333341
334-
The keyword argument `method` determines whether the `:shortest`, i.e. highest density, interval is used
335-
or the `:equaltail`, i.e. quantile-based, interval is used. For historical reasons, the default is `:shortest`,
342+
The keyword argument `method` determines whether the `:shortest`, i.e. highest density, interval is used
343+
or the `:equaltail`, i.e. quantile-based, interval is used. For historical reasons, the default is `:shortest`,
336344
but `:equaltail` gives the interval that is most comparable to the profile and Wald confidence intervals.
337345
338346
!!! note
@@ -621,7 +629,7 @@ end
621629
function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T}
622630
"""
623631
σρ!(v, t, σ)
624-
push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
632+
push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
625633
"""
626634
dat = t.data
627635
for i in axes(dat, 1)

src/generalizedlinearmixedmodel.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ StatsAPI.deviance(m::GeneralizedLinearMixedModel) = deviance(m, m.optsum.nAGQ)
113113

114114
fixef(m::GeneralizedLinearMixedModel) = m.β
115115

116-
function fixef!(v::AbstractVector{T}, m::GeneralizedLinearMixedModel{T}) where {T}
117-
return copyto!(fill!(v, -zero(T)), m.β)
116+
function fixef!(v::AbstractVector{Tv}, m::GeneralizedLinearMixedModel{T}) where {Tv,T}
117+
return copyto!(fill!(v, -zero(Tv)), m.β)
118118
end
119119

120120
objective(m::GeneralizedLinearMixedModel) = deviance(m)

src/linearmixedmodel.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,12 @@ function StatsAPI.fit(
267267
end
268268

269269
function StatsAPI.coef(m::LinearMixedModel{T}) where {T}
270+
return coef!(Vector{T}(undef, length(m.feterm.piv)), m)
271+
end
272+
273+
function coef!(v::AbstractVector{Tv}, m::MixedModel{T}) where {Tv,T}
270274
piv = m.feterm.piv
271-
return invpermute!(fixef!(similar(piv, T), m), piv)
275+
return invpermute!(fixef!(v, m), piv)
272276
end
273277

274278
βs(m::LinearMixedModel) = NamedTuple{(Symbol.(coefnames(m))...,)}(coef(m))

src/simulate.jl

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,6 @@ scale.
110110
The `wts` argument is currently ignored except for `GeneralizedLinearMixedModel`
111111
models with a `Binomial` distribution.
112112
113-
!!! warning
114-
Models are assumed to be full rank.
115-
116113
!!! note
117114
Note that `simulate!` methods with a `y::AbstractVector` as the first argument
118115
(besides the RNG) and `simulate` methods return the simulated response. This is
@@ -152,20 +149,17 @@ end
152149
function simulate!(
153150
rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=m.θ
154151
) where {T}
155-
length(β) == length(fixef(m)) ||
156-
length(β) == length(coef(m)) ||
152+
length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank ||
157153
throw(ArgumentError("You must specify all (non-singular) βs"))
158154

159155
β = convert(Vector{T}, β)
160156
σ = T(σ)
161157
θ = convert(Vector{T}, θ)
162158
isempty(θ) || setθ!(m, θ)
163159

164-
if length(β) length(coef(m))
165-
padding = length(coef(m)) - length(β)
166-
for ii in 1:padding
167-
push!(β, -0.0)
168-
end
160+
if length(β) length(m.feterm.piv)
161+
padding = length(model.feterm.piv) - m.feterm.rank
162+
append!(β, fill(-0.0, padding))
169163
end
170164

171165
# initialize y to standard normal
@@ -234,8 +228,7 @@ function _simulate!(
234228
θ,
235229
resp=nothing,
236230
) where {T}
237-
length(β) == length(fixef(m)) ||
238-
length(β) == length(coef(m)) ||
231+
length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank ||
239232
throw(ArgumentError("You must specify all (non-singular) βs"))
240233

241234
dispersion_parameter(m) ||
@@ -254,11 +247,9 @@ function _simulate!(
254247

255248
d = m.resp.d
256249

257-
if length(β) length(coef(m))
258-
padding = length(coef(m)) - length(β)
259-
for ii in 1:padding
260-
push!(β, -0.0)
261-
end
250+
if length(β) length(m.feterm.piv)
251+
padding = length(model.feterm.piv) - m.feterm.rank
252+
append!(β, fill(-0.0, padding))
262253
end
263254

264255
fast = (length(m.θ) == length(m.optsum.final))

test/bootstrap.jl

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ end
189189
@test pb1 == restorereplicates(seekstart(io), MixedModels.unfit!(deepcopy(m1)))
190190

191191
@test pb1 restorereplicates(seekstart(io), m1, Float16) rtol=1
192-
end
192+
end
193193

194194
@testset "Bernoulli simulate! and GLMM bootstrap" begin
195195
contra = dataset(:contra)
@@ -220,6 +220,22 @@ end
220220
σ=2, progress=false)
221221
@test sum(issingular(bs)) == 0
222222
end
223+
224+
@testset "Rank deficient" begin
225+
rng = MersenneTwister(0);
226+
x = rand(rng, 100);
227+
data = (x = x, x2 = 1.5 .* x, y = rand(rng, 100), z = repeat('A':'T', 5))
228+
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data; progress=false)
229+
boot = quickboot(model, 10)
230+
231+
dropped_idx = model.feterm.piv[end]
232+
dropped_coef = coefnames(model)[dropped_idx]
233+
@test all(boot.β) do nt
234+
# if we're the dropped coef, then we must be -0.0
235+
# need isequal because of -0.0
236+
return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
237+
end
238+
end
223239
end
224240

225241
@testset "show and summary" begin
@@ -236,7 +252,7 @@ end
236252
@test nrow(df) == 151
237253
@test propertynames(df) == collect(propertynames(pr.tbl))
238254

239-
@testset "CI method comparison" begin
255+
@testset "CI method comparison" begin
240256
level = 0.68
241257
ci_boot_equaltail = confint(pb; level, method=:equaltail)
242258
ci_boot_shortest = confint(pb; level, method=:shortest)

0 commit comments

Comments
 (0)