TuringLang · Red-Portal · Nov 26, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,3 +1,14 @@
+# Release 0.7
+
+## Removal of special treatment to `Bijectors.TransformedDistribution`
+
+Previously, `KLMinRepGradDescent`, `KLMinRepGradProxDescent`, `KLMinScoreGradDescent` only required the support of the target log-density problem to match that of `q`.
+This was implemented by giving a special treatment to `q <: Bijectors.TransformedDistribution` through the `Bijectors` extension.
+This, however, resulted in a multiplicative complexity in maintaining the relevant bits.
+Since this is not the only way to deal with constrained supports, `Bijectors` extension is now removed.
+In addition, `KLMinRepGradDescent`, `KLMinRepGradProxDescent`, `KLMinScoreGradDescent` now expect an unconstrained target log-density problem.
+Instead, a tutorial has been added to the documentation on how to deal with a target log-density problem with constrained support.
+
 # Release 0.6
 
 ## New Algorithms

diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "AdvancedVI"
 uuid = "b5ca4192-6429-45e5-a2d9-87aec30a685c"
-version = "0.6"
+version = "0.7"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -20,21 +20,18 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [weakdeps]
-Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 
 [extensions]
-AdvancedVIBijectorsExt = ["Bijectors", "Optimisers"]
 AdvancedVIEnzymeExt = ["Enzyme", "ChainRulesCore"]
 AdvancedVIMooncakeExt = ["Mooncake", "ChainRulesCore"]
 AdvancedVIReverseDiffExt = ["ReverseDiff", "ChainRulesCore"]
 
 [compat]
 ADTypes = "1"
 Accessors = "0.1"
-Bijectors = "0.13, 0.14, 0.15"
 ChainRulesCore = "1"
 DiffResults = "1"
 DifferentiationInterface = "0.6, 0.7"
@@ -54,7 +51,6 @@ StatsBase = "0.32, 0.33, 0.34"
 julia = "1.10, 1.11.2"
 
 [extras]
-Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

diff --git a/README.md b/README.md
@@ -3,13 +3,13 @@
 [![Tests](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Tests.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Tests.yml/badge.svg?branch=main)
 [![Coverage](https://codecov.io/gh/TuringLang/AdvancedVI.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/TuringLang/AdvancedVI.jl)
 
-| AD Backend    | Integration Status |
-| ------------- | ------------- |
-| [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl)   | [![ForwardDiff](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/ForwardDiff.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/ForwardDiff.yml?query=branch%3Amain) |
-| [ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl)   | [![ReverseDiff](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/ReverseDiff.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/ReverseDiff.yml?query=branch%3Amain) |
-| [Zygote](https://github.com/FluxML/Zygote.jl)        | [![Zygote](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Zygote.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Zygote.yml?query=branch%3Amain) |
-| [Mooncake](https://github.com/chalk-lab/Mooncake.jl)      | [![Mooncake](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Mooncake.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Mooncake.yml?query=branch%3Amain) |
-| [Enzyme](https://github.com/EnzymeAD/Enzyme.jl)        | [![Enzyme](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Enzyme.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Enzyme.yml?query=branch%3Amain) |
+| AD Backend                                                 | Integration Status                                                                                                                                                                                                       |
+|:---------------------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl) | [![ForwardDiff](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/ForwardDiff.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/ForwardDiff.yml?query=branch%3Amain) |
+| [ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl) | [![ReverseDiff](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/ReverseDiff.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/ReverseDiff.yml?query=branch%3Amain) |
+| [Zygote](https://github.com/FluxML/Zygote.jl)              | [![Zygote](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Zygote.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Zygote.yml?query=branch%3Amain)                |
+| [Mooncake](https://github.com/chalk-lab/Mooncake.jl)       | [![Mooncake](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Mooncake.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Mooncake.yml?query=branch%3Amain)          |
+| [Enzyme](https://github.com/EnzymeAD/Enzyme.jl)            | [![Enzyme](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Enzyme.yml/badge.svg?branch=main)](https://github.com/TuringLang/AdvancedVI.jl/actions/workflows/Enzyme.yml?query=branch%3Amain)                |
 
 # AdvancedVI.jl
 
@@ -69,7 +69,7 @@ end;
 
 Since the support of `σ` is constrained to be positive and most VI algorithms assume an unconstrained Euclidean support, we need to use a *bijector* to transform `θ`.
 We will use [`Bijectors`](https://github.com/TuringLang/Bijectors.jl) for this purpose.
-This corresponds to the automatic differentiation variational inference (ADVI) formulation[^KTRGB2017].
+The bijector corresponding to the joint support of our model can be constructed as follows:
 
 ```julia
 using Bijectors: Bijectors
@@ -85,6 +85,41 @@ end;
 
 A simpler approach would be to use [`Turing`](https://github.com/TuringLang/Turing.jl), where a `Turing.Model` can be automatically be converted into a `LogDensityProblem` and a corresponding `bijector` is automatically generated.
 
+Since most VI algorithms assume that the posterior is unconstrained, we will apply a change-of-variable to our model to make it unconstrained.
+This amounts to wrapping it into a `LogDensityProblem` that applies the transformation and the corresponding Jacobian adjustment.
+
+```julia
+struct TransformedLogDensityProblem{Prob,BInv}
+    prob::Prob
+    binv::BInv
+end
+
+function TransformedLogDensityProblem(prob)
+    b = Bijectors.bijector(prob)
+    binv = Bijectors.inverse(b)
+    return TransformedLogDensityProblem{typeof(prob),typeof(binv)}(prob, binv)
+end
+
+function LogDensityProblems.logdensity(prob_trans::TransformedLogDensityProblem, θ_trans)
+    (; prob, binv) = prob_trans
+    θ, logabsdetjac = Bijectors.with_logabsdet_jacobian(binv, θ_trans)
+    return LogDensityProblems.logdensity(prob, θ) + logabsdetjac
+end
+
+function LogDensityProblems.dimension(prob_trans::TransformedLogDensityProblem)
+    (; prob, binv) = prob_trans
+    b = Bijectors.inverse(binv)
+    d = LogDensityProblems.dimension(prob)
+    return prod(Bijectors.output_size(b, (d,)))
+end
+
+function LogDensityProblems.capabilities(
+    ::Type{TransformedLogDensityProblem{Prob,BInv}}
+) where {Prob,BInv}
+    return LogDensityProblems.capabilities(Prob)
+end;
+```
+
 For the dataset, we will use the popular [sonar classification dataset](https://archive.ics.uci.edu/dataset/151/connectionist+bench+sonar+mines+vs+rocks) from the UCI repository.
 This can be automatically downloaded using [`OpenML`](https://github.com/JuliaAI/OpenML.jl).
 The sonar dataset corresponds to the dataset id 40.
@@ -109,7 +144,8 @@ X = hcat(X, ones(size(X, 1)));
 The model can now be instantiated as follows:
 
 ```julia
-model = LogReg(X, y);
+prob = LogReg(X, y);
+prob_trans = TransformedLogDensityProblem(prob)
 ```
 
 For the VI algorithm, we will use `KLMinRepGradDescent`:
@@ -136,37 +172,38 @@ For this, it is straightforward to use `LogDensityProblemsAD`:
 using DifferentiationInterface: DifferentiationInterface
 using LogDensityProblemsAD: LogDensityProblemsAD
 
-model_ad = LogDensityProblemsAD.ADgradient(ADTypes.AutoReverseDiff(), model);
+prob_trans_ad = LogDensityProblemsAD.ADgradient(ADTypes.AutoReverseDiff(), prob_trans);
 ```
 
 For the variational family, we will consider a `FullRankGaussian` approximation:
 
 ```julia
 using LinearAlgebra
 
-d = LogDensityProblems.dimension(model_ad)
-q = FullRankGaussian(zeros(d), LowerTriangular(Matrix{Float64}(0.37*I, d, d)))
+d = LogDensityProblems.dimension(prob_trans_ad)
+q = FullRankGaussian(zeros(d), LowerTriangular(Matrix{Float64}(0.6*I, d, d)))
 q = MeanFieldGaussian(zeros(d), Diagonal(ones(d)));
 ```
 
-The bijector can now be applied to `q` to match the support of the target problem.
+We can now run VI:
 
 ```julia
-b = Bijectors.bijector(model)
-binv = Bijectors.inverse(b)
-q_transformed = Bijectors.TransformedDistribution(q, binv);
+max_iter = 10^3
+q_opt, info, _ = AdvancedVI.optimize(alg, max_iter, prob_trans_ad, q);
 ```
 
-We can now run VI:
+Recall that we applied a change-of-variable to the posterior to make it unconstrained.
+This, however, is not the original constrained posterior that we wanted to approximate.
+Therefore, we finally need to apply a change-of-variable to `q_opt` to make it approximate our original problem.
 
 ```julia
-max_iter = 10^3
-q, info, _ = AdvancedVI.optimize(alg, max_iter, model_ad, q_transformed;);
+b = Bijectors.bijector(prob)
+binv = Bijectors.inverse(b)
+q_trans = Bijectors.TransformedDistribution(q_opt, binv)
 ```
 
 For more examples and details, please refer to the documentation.
 
 [^TL2014]: Titsias, M., & Lázaro-Gredilla, M. (2014, June). Doubly stochastic variational Bayes for non-conjugate inference. In *International Conference on Machine Learning*. PMLR.
 [^RMW2014]: Rezende, D. J., Mohamed, S., & Wierstra, D. (2014, June). Stochastic backpropagation and approximate inference in deep generative models. In *International Conference on Machine Learning*. PMLR.
 [^KW2014]: Kingma, D. P., & Welling, M. (2014). Auto-encoding variational bayes. In *International Conference on Learning Representations*.
-[^KTRGB2017]: Kucukelbir, A., Tran, D., Ranganath, R., Gelman, A., & Blei, D. M. (2017). Automatic differentiation variational inference. *Journal of machine learning research*.
diff --git a/bench/Project.toml b/bench/Project.toml
@@ -2,7 +2,6 @@
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 AdvancedVI = "b5ca4192-6429-45e5-a2d9-87aec30a685c"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
@@ -20,9 +19,8 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 ADTypes = "1"
-AdvancedVI = "0.6"
+AdvancedVI = "0.7"
 BenchmarkTools = "1"
-Bijectors = "0.13, 0.14, 0.15"
 Distributions = "0.25.111"
 DistributionsAD = "0.6"
 Enzyme = "0.13.7"

diff --git a/bench/benchmarks.jl b/bench/benchmarks.jl
@@ -1,7 +1,6 @@
 using ADTypes
 using AdvancedVI
 using BenchmarkTools
-using Bijectors
 using Distributions
 using DistributionsAD
 using Enzyme, ForwardDiff, ReverseDiff, Zygote, Mooncake
@@ -17,8 +16,34 @@ BLAS.set_num_threads(min(4, Threads.nthreads()))
 @info sprint(versioninfo)
 @info "BLAS threads: $(BLAS.get_num_threads())"
 
-include("normallognormal.jl")
-include("unconstrdist.jl")
+struct Dist{D<:ContinuousMultivariateDistribution}
+    dist::D
+end
+
+function LogDensityProblems.logdensity(model::Dist, x)
+    return logpdf(model.dist, x)
+end
+
+function LogDensityProblems.logdensity_and_gradient(model::Dist, θ)
+    return (
+        LogDensityProblems.logdensity(model, θ),
+        ForwardDiff.gradient(Base.Fix1(LogDensityProblems.logdensity, model), θ),
+    )
+end
+
+function LogDensityProblems.dimension(model::Dist)
+    return length(model.dist)
+end
+
+function LogDensityProblems.capabilities(::Type{<:Dist})
+    return LogDensityProblems.LogDensityOrder{0}()
+end
+
+function normal(; n_dims=10, realtype=Float64)
+    μ = fill(realtype(5), n_dims)
+    Σ = Diagonal(ones(realtype, n_dims))
+    return Dist(MvNormal(μ, Σ))
+end
 
 const SUITES = BenchmarkGroup()
 
@@ -33,10 +58,7 @@ end
 begin
     T = Float64
 
-    for (probname, prob) in [
-        ("normal + bijector", normallognormal(; n_dims=10, realtype=T))
-        ("normal", normal(; n_dims=10, realtype=T))
-    ]
+    for (probname, prob) in [("normal", normal(; n_dims=10, realtype=T))]
         max_iter = 10^4
         d = LogDensityProblems.dimension(prob)
         opt = Optimisers.Adam(T(1e-3))
@@ -59,9 +81,7 @@ begin
                 ),
             ]
 
-            b = Bijectors.bijector(prob)
-            binv = inverse(b)
-            q = Bijectors.TransformedDistribution(family, binv)
+            q = family
             alg = KLMinRepGradDescent(adtype; optimizer=opt, entropy, operator=ClipScale())
 
             SUITES[probname][objname][familyname][adname] = begin

diff --git a/bench/normallognormal.jl b/bench/normallognormal.jl
diff --git a/bench/unconstrdist.jl b/bench/unconstrdist.jl
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -25,7 +25,7 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 [compat]
 ADTypes = "1"
 Accessors = "0.1"
-AdvancedVI = "0.6"
+AdvancedVI = "0.7"
 Bijectors = "0.13.6, 0.14, 0.15"
 DataFrames = "1"
 DifferentiationInterface = "0.7"

diff --git a/docs/make.jl b/docs/make.jl
@@ -21,6 +21,7 @@ makedocs(;
             "Scaling to Large Datasets" => "tutorials/subsampling.md",
             "Stan Models" => "tutorials/stan.md",
             "Normalizing Flows" => "tutorials/flows.md",
+            "Dealing with Constrained Posteriors" => "tutorials/constrained.md",
         ],
         "Algorithms" => [
             "`KLMinRepGradDescent`" => "klminrepgraddescent.md",