Skip to content

Commit 6d99ac6

Browse files
authored
Blocked sparse (#102)
* Add the long d3 benchmark * Add and use BlockedSparse type * Add positive-definite checks on 2x2 case * Drop LowerTriangular wrap * Add methods for BlockedSparse * Remove LowerTriangular wrap * Tighten code * Clean up updateL! [ci skip] * Avoid d3 test pending more debugging * Avoid fit of d3 pending more debugging * Reformulate lambda products * Add tests and methods * Move rankupdate! test to UniformBlockDiagona.jl * Tighten code a tad * Try (unsuccessfully) to avoid allocations * Add tests for coverage * Tighten code * Correct defn for docs * Export more types and generics * BlockedSparse for L only * Extend test coverage * Reinstate d3 benchmark * Add test of U cholfact for coverage * Invoke the d3 benchmark * Yet another typo fixed
1 parent 5dca0d6 commit 6d99ac6

18 files changed

+335
-259
lines changed

benchmark/benchmarks.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ const mods = Dict{Symbol,Vector{Expr}}(
5050
:bs10 => [:(1+U+V+W+((1+U+V+W)|G)+((1+U+V+W)|H))],
5151
:cake => [:(1+A*B+(1|G))],
5252
:cbpp => [:(1+A+(1|G))], # Binomial glmm, create and rename variables
53-
:d3 => [:(1+U+(1|G)+(1|H)+(1|I))],
53+
:d3 => [:(1+U+(1|G)+(1|H)+(1|I)), :(1+U+(1+U|G)+(1+U|H)+(1+U|I))],
5454
:dialectNL => [:(1+A+T+U+V+W+X+(1|G)+(1|H)+(1|I))],
5555
:egsingle => [:(1+A+U+V+(1|G)+(1|H))],
5656
:epilepsy => [], # unknown origin
@@ -98,8 +98,8 @@ end
9898
end
9999

100100
@benchgroup "crossed" ["multiple", "crossed", "scalar"] begin
101-
for ds in [:Assay, :Demand, :InstEval, :Penicillin, :ScotsSec, :d3, :dialectNL,
102-
:egsingle, :paulsim]
101+
for ds in [:Assay, :Demand, :InstEval, :Penicillin, :ScotsSec, :d3,
102+
:dialectNL, :egsingle, :paulsim]
103103
for rhs in mods[ds]
104104
@bench string(ds, ':', rhs) fitbobyqa($(QuoteNode(rhs)), $(QuoteNode(ds)))
105105
end

src/MixedModels.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ using StatsFuns: log2π
88
using NamedArrays: NamedArray, setnames!
99
using Base.LinAlg: BlasFloat, BlasReal, HermOrSym, PosDefException, checksquare, copytri!
1010

11-
import Base: cor, cond, convert, eltype, full, logdet, std
11+
import Base: ReshapedArray, cor, cond, convert, eltype, full, logdet, std
1212
import Base.LinAlg: A_mul_B!, A_mul_Bc!, Ac_mul_B!, A_ldiv_B!, Ac_ldiv_B!, A_rdiv_B!, A_rdiv_Bc!
1313
import NLopt: Opt
1414
import StatsBase: coef, coeftable, dof, deviance, fit!, fitted, loglikelihood,
@@ -21,6 +21,7 @@ export
2121
Bernoulli,
2222
Binomial,
2323
Block,
24+
BlockedSparse,
2425
Gamma,
2526
LogitLink,
2627
LogLink,
@@ -42,9 +43,10 @@ export
4243
coef,
4344
coeftable,
4445
cond,
46+
describeblocks,
4547
condVar,
46-
dof,
4748
deviance,
49+
dof,
4850
fit!,
4951
fitted,
5052
fixef, # extract the fixed-effects parameter estimates

src/linalg.jl

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ function αβA_mul_Bc!(α::T, A::SparseMatrixCSC{T}, B::SparseMatrixCSC{T},
1010
arv = rowvals(A)
1111
bnz = nonzeros(B)
1212
brv = rowvals(B)
13-
if β one(T)
14-
β zero(T) ? scale!(C, β) : fill!(C, β)
15-
end
13+
β == 1 || scale!(C, β)
1614
for j = 1:A.n
1715
for ib in nzrange(B, j)
1816
αbnz = α * bnz[ib]
@@ -25,15 +23,16 @@ function αβA_mul_Bc!(α::T, A::SparseMatrixCSC{T}, B::SparseMatrixCSC{T},
2523
C
2624
end
2725

26+
αβA_mul_Bc!::T, A::BlockedSparse{T}, B::BlockedSparse{T}, β::T, C::Matrix{T}) where T =
27+
αβA_mul_Bc!(α, A.cscmat, B.cscmat, β, C)
28+
2829
function αβA_mul_Bc!::T, A::StridedVecOrMat{T}, B::SparseMatrixCSC{T}, β::T,
2930
C::StridedVecOrMat{T}) where T
3031
m, n = size(A)
3132
p, q = size(B)
3233
r, s = size(C)
3334
@argcheck(r == m && s == p && n == q, DimensionMismatch)
34-
if β one(T)
35-
iszero(β) ? fill!(C, β) : scale!(C, β)
36-
end
35+
β == 1 || scale!(C, β)
3736
nz = nonzeros(B)
3837
rv = rowvals(B)
3938
@inbounds for j in 1:q, k in nzrange(B, j)
@@ -46,6 +45,9 @@ function αβA_mul_Bc!(α::T, A::StridedVecOrMat{T}, B::SparseMatrixCSC{T}, β::
4645
C
4746
end
4847

48+
αβA_mul_Bc!::T, A::StridedVecOrMat{T}, B::BlockedSparse{T}, β::T,
49+
C::StridedVecOrMat{T}) where T = αβA_mul_Bc!(α, A, B.cscmat, β, C)
50+
4951
αβAc_mul_B!::T, A::StridedMatrix{T}, B::StridedVector{T}, β::T,
5052
C::StridedVector{T}) where {T<:BlasFloat} = BLAS.gemv!('C', α, A, B, β, C)
5153

@@ -74,9 +76,7 @@ if VERSION < v"0.7.0-DEV.586"
7476
A_rdiv_Bc!(A::StridedMatrix{T}, D::Diagonal{T}) where {T} = A_rdiv_B!(A, D)
7577

7678
function A_rdiv_Bc!(A::SparseMatrixCSC{T}, D::Diagonal{T}) where T
77-
if size(D, 2) size(A, 2)
78-
throw(DimensionMismatch("size(A,2)=$(size(A,2)) should be size(D, 1)=$(size(D,1))"))
79-
end
79+
@argcheck(size(D, 2) == size(A, 2), DimensionMismatch)
8080
dd = D.diag
8181
nonz = nonzeros(A)
8282
for j in 1:A.n
@@ -89,28 +89,22 @@ if VERSION < v"0.7.0-DEV.586"
8989
end
9090
end
9191

92-
function A_rdiv_Bc!(A::Matrix{T}, B::LowerTriangular{T,UniformBlockDiagonal{T}}) where {T}
93-
m, n, k = size(B.data.data)
94-
@argcheck size(A, 2) == size(B, 1) && m == n DimensionMismatch
95-
offset = 0
96-
one2m = 1:m
97-
for f in B.data.facevec
98-
BLAS.trsm!('R', 'L', 'T', 'N', one(T), f, view(A, :, one2m + offset))
99-
offset += m
92+
function A_rdiv_Bc!(A::Matrix{T}, B::LowerTriangular{T,UniformBlockDiagonal{T}}) where T
93+
Bd = B.data
94+
m, n, k = size(Bd.data)
95+
@argcheck(size(A, 2) == size(Bd, 1) && m == n, DimensionMismatch)
96+
inds = 1:m
97+
for f in Bd.facevec
98+
BLAS.trsm!('R', 'L', 'T', 'N', one(T), f, view(A, :, inds))
99+
inds += m
100100
end
101101
A
102102
end
103103

104-
function A_rdiv_Bc!(A::SparseMatrixCSC{T}, B::LowerTriangular{T,UniformBlockDiagonal{T}}) where {T}
105-
nz = nonzeros(A)
106-
offset = 0
107-
m, n, k = size(B.data.data)
108-
for f in B.data.facevec
109-
nzr = nzrange(A, offset + 1).start : nzrange(A, offset + n).stop
110-
q = div(length(nzr), m)
111-
## FIXME Still allocating 1.4 GB. Call BLAS.trsm directly
112-
A_rdiv_Bc!(unsafe_wrap(Array, pointer(nz, nzr[1]), (q, m)), LowerTriangular(f))
113-
offset += n
104+
function A_rdiv_Bc!(A::BlockedSparse{T}, B::LowerTriangular{T,UniformBlockDiagonal{T}}) where T
105+
@argcheck(length(A.colblocks) == length(B.data.facevec), DimensionMismatch)
106+
for (b,f) in zip(A.colblocks, B.data.facevec)
107+
A_rdiv_Bc!(b, LowerTriangular(f))
114108
end
115109
A
116110
end

src/linalg/cholUnblocked.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@ function cholUnblocked!(A::StridedMatrix{T}, ::Type{Val{:L}}) where T<:BlasFloat
1919
A[1] < zero(T) && throw(PosDefException(1))
2020
A[1] = sqrt(A[1])
2121
elseif n == 2
22+
A[1] < zero(T) && throw(PosDefException(1))
2223
A[1] = sqrt(A[1])
2324
A[2] /= A[1]
24-
A[4] = sqrt(A[4] - abs2(A[2]))
25+
(A[4] -= abs2(A[2])) < zero(T) && throw(PosDefException(2))
26+
A[4] = sqrt(A[4])
2527
else
2628
_, info = LAPACK.potrf!('L', A)
27-
info 0 && throw(PosDefException(info))
29+
iszero(info) || throw(PosDefException(info))
2830
end
2931
A
3032
end

src/linalg/lambdaprods.jl

Lines changed: 44 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
"""
2-
Λc_mul_B!(A::AbstractTerm, B::AbstractArray)
32
A_mul_Λ!(A::AbstractArray, B::AbstractTerm)
43
5-
In-place products w.r.t. blocks of Λ or Λ′
4+
In-place product of `A` with a repeated block diagonal expansion of `B.Λ``
65
76
An [`AbstractTerm`]{@ref} of size n×k includes a description of a lower triangular
87
k×k matrix determined by the θ parameter vector. These matrices are, at most, repeated
@@ -12,94 +11,64 @@ with a [`MatrixTerm`]{@ref} is the identity.
1211
See also [`scaleinflate!`]{@ref} which performs two such multiplications plus inflation of
1312
the diagonal plus a copy! operation in one step.
1413
"""
15-
function Λc_mul_B! end
1614
function A_mul_Λ! end
17-
18-
Λc_mul_B!(A::MatrixTerm, B) = B
1915
A_mul_Λ!(A, B::MatrixTerm) = A
20-
2116
A_mul_Λ!(A, B::ScalarFactorReTerm) = scale!(A, B.Λ)
22-
Λc_mul_B!(A::ScalarFactorReTerm, B) = scale!(A.Λ, B)
23-
24-
function A_mul_Λ!(A::SparseMatrixCSC{T,S}, B::VectorFactorReTerm{T}) where {T,S}
25-
k = vsize(B)
26-
nz = nonzeros(A)
27-
λ = LowerTriangular(B.Λ)
28-
m, n = size(A)
29-
cp = A.colptr
30-
rv = rowvals(A)
31-
blkstart = 1
32-
while blkstart n
33-
i1 = nzrange(A, blkstart)
34-
r = length(i1)
35-
if (cp[blkstart + k] - cp[blkstart]) length(i1) * k
36-
throw(ArgumentError("A is not compatible with B"))
37-
end
38-
## consider using a pointer here to cut down on allocation (~ 1GB for d3 fit)
39-
a = reshape(view(nz, cp[blkstart]:(cp[blkstart + k] - 1)), (r, k))
40-
A_mul_B!(a, a, λ)
41-
blkstart += k
17+
function A_mul_Λ!(A::BlockedSparse{T}, B::VectorFactorReTerm{T}) where T
18+
λ = B.Λ
19+
for blk in A.colblocks
20+
A_mul_B!(blk, λ)
4221
end
4322
A
4423
end
45-
46-
function Λ_mul_B!(A::VectorFactorReTerm{T}, B::StridedVector{T}) where T
47-
@argcheck (k = vsize(A)) > 1
48-
λ = LowerTriangular(A.Λ)
49-
A_mul_B!(λ, reshape(B, (k, div(length(B), k))))
50-
B
51-
end
52-
53-
Λ_mul_B!(A::ScalarFactorReTerm{T}, B::StridedVecOrMat{T}) where T = scale!(B, A.Λ)
54-
55-
function A_mul_Λ!(A::Matrix{T}, B::VectorFactorReTerm{T}) where T<:AbstractFloat
56-
@argcheck (k = vsize(B)) > 1
57-
λ = LowerTriangular(B.Λ)
24+
function A_mul_Λ!(A::Matrix{T}, B::VectorFactorReTerm{T,V,R,S}) where {T,V,R,S}
25+
λ = B.Λ
5826
m, n = size(A)
59-
q, r = divrem(n, k)
60-
if r 0
61-
throw(DimensionMismatch("size(A, 2) = $n is not a multiple of size(B.λ, 1) = $k"))
62-
end
63-
offset = 0
64-
onetok = 1:k
65-
for blk in 1:q
66-
## another place where ~ 1GB is allocated in d3 fit
67-
A_mul_B!(view(A, :, onetok + offset), λ)
68-
offset += k
27+
q, r = divrem(n, S)
28+
iszero(r) || throw(DimensionMismatch("size(A, 2) = $n is not a multiple of S = $S"))
29+
A3 = reshape(A, (m, S, q))
30+
for k in 1:q
31+
A_mul_B!(view(A3, :, :, k), λ)
6932
end
7033
A
7134
end
7235

73-
Λ_mul_B!(C::AbstractArray{T}, A::ScalarFactorReTerm{T}, B::AbstractArray{T}) where T = scale!(C, A.Λ, B)
36+
"""
37+
Λc_mul_B!(A::AbstractTerm, B::AbstractArray)
38+
39+
In-place product of a repeated block diagonal expansion of `A.Λ'` with `B`
7440
75-
function Λ_mul_B!(C::StridedVecOrMat{T}, A::VectorFactorReTerm{T},
76-
B::StridedVecOrMat{T}) where T
77-
@argcheck(size(C) == size(B), DimensionMismatch)
78-
k = vsize(A)
79-
q, r = divrem(size(C, 1), k)
80-
iszero(r) || throw(ArgumentError("size(C, 1) = $(size(C,1)) is not a multiple of $k = vsize(A)"))
81-
A_mul_B!(LowerTriangular(A.Λ), reshape(copy!(C, B), (k, size(C, 2) * q)))
82-
C
41+
See also [`scaleinflate!`]{@ref} which performs two such multiplications plus inflation of
42+
the diagonal plus a copy! operation in one step.
43+
"""
44+
function Λc_mul_B! end
45+
Λc_mul_B!(A::MatrixTerm, B) = B
46+
Λc_mul_B!(A::ScalarFactorReTerm, B) = scale!(A.Λ, B)
47+
function Λc_mul_B!(A::VectorFactorReTerm{T,V,R,S}, B::Matrix{T}) where {T,V,R,S}
48+
m, n = size(B)
49+
Ac_mul_B!(A.Λ, reshape(B, (S, div(m, S) * n)))
50+
B
8351
end
8452

85-
function Λc_mul_B!(A::VectorFactorReTerm{T}, B::StridedVecOrMat{T}) where T
86-
@argcheck (k = vsize(A)) > 1
87-
λ = LowerTriangular(A.Λ)
88-
m, n = size(B, 1), size(B, 2)
89-
Ac_mul_B!(λ, reshape(B, (k, div(m, k) * n)))
53+
function Λc_mul_B!(A::VectorFactorReTerm{T}, B::BlockedSparse{T}) where T
54+
Ac_mul_B!(A.Λ, B.nzsasmat)
9055
B
9156
end
9257

93-
function Λc_mul_B!(A::VectorFactorReTerm{T}, B::SparseMatrixCSC{T}) where {T}
94-
@argcheck (k = vsize(A)) > 1
95-
nz = nonzeros(B)
96-
λ = LowerTriangular(A.Λ)
97-
for j in 1:B.n
98-
## third place with over 1 GB allocation in d3 fit
99-
## probably call BLAS.trmm directly here
100-
bnz = view(nz, nzrange(B, j))
101-
mbj = reshape(bnz, (k, div(length(bnz), k)))
102-
Ac_mul_B!(mbj, λ, mbj)
103-
end
104-
B
58+
"""
59+
Λ_mul_B!(C::Matrix, A::AbstractFactorReTerm, B::Matrix)
60+
61+
Mutating product of the repeated block-diagonal expansion of `A` and `B` into `C`
62+
This multiplication is used to convert "spherical" random effects to the original scale.
63+
"""
64+
function Λ_mul_B!(C::Matrix, A::AbstractFactorReTerm, B::Matrix) end
65+
66+
function Λ_mul_B!(C::Matrix{T}, A::ScalarFactorReTerm{T}, B::Matrix{T}) where T
67+
@argcheck(size(C) == size(B) == (1, size(A, 2)), DimensionMismatch)
68+
scale!(C, A.Λ, B)
69+
end
70+
71+
function Λ_mul_B!(C::Matrix{T}, A::VectorFactorReTerm{T,V,R,S}, B::Matrix{T}) where {T,V,R,S}
72+
@argcheck(size(C) == size(B) == (S, div(size(A, 2), S)), DimensionMismatch)
73+
A_mul_B!(A.Λ, copy!(C, B))
10574
end

src/linalg/logdet.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ function logdet(m::LinearMixedModel{T}) where {T}
3636
Ldat = m.L.data
3737
for (i, trm) in enumerate(m.trms)
3838
if isa(trm, AbstractFactorReTerm)
39-
s += LD(m.L.data[Block(i, i)])
39+
s += LD(Ldat[Block(i, i)])
4040
end
4141
end
4242
2s

src/linalg/rankUpdate.jl

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ function rankUpdate!(α::T, A::SparseMatrixCSC{T},
3434
m, n = size(A)
3535
@argcheck m == size(C, 2) && C.uplo == 'L' DimensionMismatch
3636
Cd = C.data
37-
if β one(T)
38-
scale!(LowerTriangular(Cd), β)
39-
end
37+
β == 1 || scale!(LowerTriangular(Cd), β)
4038
rv = rowvals(A)
4139
nz = nonzeros(A)
4240
@inbounds for jj in 1:n
@@ -57,10 +55,13 @@ end
5755
rankUpdate!::T, A::SparseMatrixCSC{T}, C::HermOrSym{T}) where {T} =
5856
rankUpdate!(α, A, one(T), C)
5957

58+
rankUpdate!::T, A::BlockedSparse{T}, C::HermOrSym{T}) where {T} =
59+
rankUpdate!(α, A.cscmat, one(T), C)
60+
6061
function rankUpdate!::T, A::SparseMatrixCSC{T}, C::Diagonal{T}) where T <: Number
6162
m, n = size(A)
6263
dd = C.diag
63-
@argcheck length(dd) == m DimensionMismatch
64+
@argcheck(length(dd) == m, DimensionMismatch)
6465
nz = nonzeros(A)
6566
rv = rowvals(A)
6667
for j in 1:n
@@ -74,26 +75,15 @@ function rankUpdate!(α::T, A::SparseMatrixCSC{T}, C::Diagonal{T}) where T <: Nu
7475
C
7576
end
7677

77-
function rankUpdate!::T, A::SparseMatrixCSC{T},
78-
C::HermOrSym{T,UniformBlockDiagonal{T}}) where T<:Number
79-
m, n, k = size(C.data.data)
80-
@argcheck m == n && size(A, 1) == m * k DimensionMismatch
81-
# Another expensive evaluation in terms of storage allocation
82-
aat = α * (A * A')
83-
nz = nonzeros(aat)
84-
rv = rowvals(aat)
85-
offset = 0
86-
for f in C.data.facevec
87-
for j in 1:m
88-
for i in nzrange(aat, offset + j)
89-
ii = rv[i] - offset
90-
0 < ii k || throw(ArgumentError("A*A' does not conform to B"))
91-
if ii j # update lower triangle only
92-
f[ii, j] += nz[i]
93-
end
94-
end
78+
function rankUpdate!::T, A::BlockedSparse{T}, C::HermOrSym{T,UniformBlockDiagonal{T}}) where T
79+
Arb = A.rowblocks
80+
Cdf = C.data.facevec
81+
(m = length(Arb)) == length(Cdf) ||
82+
throw(DimensionMismatch("length(A.rowblocks) = $m$(length(Cdf)) = length(C.data.facevec)"))
83+
for (b, d) in zip(Arb, Cdf)
84+
for v in b
85+
BLAS.syr!('L', α, v, d)
9586
end
96-
offset += m
9787
end
9888
C
9989
end

0 commit comments

Comments
 (0)