From 4c3b8698233153ec6d1541980383188884111eb8 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Fri, 10 Feb 2023 14:05:09 +0100 Subject: [PATCH 1/9] Add MvNormal sampling consistency test --- test/mvnormal.jl | 74 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 17 deletions(-) diff --git a/test/mvnormal.jl b/test/mvnormal.jl index 5b892870..09f3fb23 100644 --- a/test/mvnormal.jl +++ b/test/mvnormal.jl @@ -47,24 +47,64 @@ include("test_utils.jl") end @testset "MvNormal{T,Pathfinder.WoodburyPDMat{T}}" begin - n = 10 - ndraws = 20 - nhist = 4 - A = rand_pd_diag_mat(Float64, 10) - D = rand_pd_mat(Float64, 2nhist) - B = randn(n, 2nhist) - Σ = Pathfinder.WoodburyPDMat(A, B, D) - μ = randn(n) - dist = MvNormal(μ, Σ) + @testset "basic" begin + n = 10 + ndraws = 20 + nhist = 4 + A = rand_pd_diag_mat(Float64, 10) + D = rand_pd_mat(Float64, 2nhist) + B = randn(n, 2nhist) + Σ = Pathfinder.WoodburyPDMat(A, B, D) + μ = randn(n) + dist = MvNormal(μ, Σ) - seed = 42 - rng = Random.seed!(Random.default_rng(), seed) - x, logpx = @inferred Pathfinder.rand_and_logpdf(rng, dist, ndraws) - Random.seed!(rng, seed) - x2 = rand(rng, dist, ndraws) - logpx2 = logpdf(dist, x2) - @test x ≈ x2 - @test logpx ≈ logpx2 + seed = 42 + rng = Random.seed!(Random.default_rng(), seed) + x, logpx = @inferred Pathfinder.rand_and_logpdf(rng, dist, ndraws) + Random.seed!(rng, seed) + x2 = rand(rng, dist, ndraws) + logpx2 = logpdf(dist, x2) + @test x ≈ x2 + @test logpx ≈ logpx2 + end + + @testset "consistency of rand" begin + n = 10 + ndraws = 300_000 + nhist = 4 + A = rand_pd_diag_mat(Float64, 10) + D = rand_pd_mat(Float64, 2nhist) + B = randn(n, 2nhist) + + Σ = Pathfinder.WoodburyPDMat(A, B, D) + μ = randn(n) + dist = MvNormal(μ, Σ) + v = diag(Σ) + R = Matrix(Σ) ./ sqrt.(v) ./ sqrt.(v') + + x = rand(dist, ndraws) + μ_est = mean(x; dims=2) + v_est = var(x; mean=μ_est, dims=2) + R_est = cor(x; dims=2) + + nchecks = 2n + div(n * (n - 1), 2) + α = (0.01 / nchecks) / 2 # multiple correction + tol = quantile(Normal(), 1 - α) / sqrt(ndraws) + + # asymptotic standard errors for the marginal estimators + μ_std = sqrt.(v) + v_std = sqrt(2) * v + + for i in 1:n + @test μ_est[i] ≈ μ[i] atol = (tol * μ_std[i]) + @test v_est[i] ≈ v[i] atol = (tol * v_std[i]) + for j in (i + 1):n + # use variance-stabilizing transformation, recommended in §3.6 of + # Van der Vaart, A. W. (2000). Asymptotic statistics (Vol. 3). + @test atanh(R_est[i, j]) ≈ atanh(R[i, j]) atol = tol + end + end + end end @testset "Normal" begin From 29815b015683cf89fa729ff0c3d1eeba9c3ef4c7 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 13 Feb 2023 17:32:47 +0100 Subject: [PATCH 2/9] Add DocStringExtensions as dependency --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index f3ba6d17..b98134c4 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.7.1" [deps] Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" Folds = "41a02a25-b8f0-4f67-bc48-60067656b558" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6" @@ -38,6 +39,7 @@ TuringExt = ["DynamicPPL", "MCMCChains", "Turing"] [compat] Accessors = "0.1" Distributions = "0.25" +DocStringExtensions = "0.9" DynamicPPL = "0.20, 0.21" Folds = "0.2" ForwardDiff = "0.10" From f0ca86f1619c2325339c8293c0718f55af354a94 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 13 Feb 2023 17:34:16 +0100 Subject: [PATCH 3/9] Move OptimizationTrace to its own file --- src/Pathfinder.jl | 1 + src/optimize.jl | 13 ------------- src/trace.jl | 24 ++++++++++++++++++++++++ 3 files changed, 25 insertions(+), 13 deletions(-) create mode 100644 src/trace.jl diff --git a/src/Pathfinder.jl b/src/Pathfinder.jl index 912558eb..9503cfb1 100644 --- a/src/Pathfinder.jl +++ b/src/Pathfinder.jl @@ -36,6 +36,7 @@ end include("transducers.jl") include("woodbury.jl") +include("trace.jl") include("optimize.jl") include("inverse_hessian.jl") include("mvnormal.jl") diff --git a/src/optimize.jl b/src/optimize.jl index db1c4f11..4cdf3479 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -72,16 +72,3 @@ function _make_optimization_callback( return ret end end - -struct OptimizationTrace{P,L} - points::P - log_densities::L - gradients::P -end - -Base.length(trace::OptimizationTrace) = length(trace.points) - -function Base.show(io::IO, trace::OptimizationTrace) - print(io, "OptimizationTrace with $(length(trace) - 1) iterations") - return nothing -end diff --git a/src/trace.jl b/src/trace.jl new file mode 100644 index 00000000..1b58aace --- /dev/null +++ b/src/trace.jl @@ -0,0 +1,24 @@ +""" + $(TYPEDEF) + +A container for the trajectory of points and values computed during optimization. + +# Fields + +$(FIELDS) +""" +struct OptimizationTrace{P,L} + "Points in the optimization trajectory" + points::P + "Log-density (negative objective function) values at `points`" + log_densities::L + "Gradient of the log-density values at `points`" + gradients::P +end + +Base.length(trace::OptimizationTrace) = length(trace.points) + +function Base.show(io::IO, trace::OptimizationTrace) + print(io, "OptimizationTrace with $(length(trace) - 1) iterations") + return nothing +end From 8216f07bd13d9698f93d621e561d3986886b6e72 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 13 Feb 2023 17:36:07 +0100 Subject: [PATCH 4/9] Add callbacks to avoid anonymous functions --- src/Pathfinder.jl | 2 + src/callbacks.jl | 161 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 src/callbacks.jl diff --git a/src/Pathfinder.jl b/src/Pathfinder.jl index 9503cfb1..bc78faaa 100644 --- a/src/Pathfinder.jl +++ b/src/Pathfinder.jl @@ -1,6 +1,7 @@ module Pathfinder using Distributions: Distributions +using DocStringExtensions using Folds: Folds # ensure that ForwardDiff is conditionally loaded by Optimization using ForwardDiff: ForwardDiff @@ -37,6 +38,7 @@ end include("transducers.jl") include("woodbury.jl") include("trace.jl") +include("callbacks.jl") include("optimize.jl") include("inverse_hessian.jl") include("mvnormal.jl") diff --git a/src/callbacks.jl b/src/callbacks.jl new file mode 100644 index 00000000..946f2ad6 --- /dev/null +++ b/src/callbacks.jl @@ -0,0 +1,161 @@ +# utility macro to annotate a field as const only if supported +@eval macro $(Symbol("const"))(x) + if VERSION ≥ v"1.8" + Expr(:const, esc(x)) + else + esc(x) + end +end + +# callbacks for Optimization.jl + +""" +$(TYPEDEF) + +Abstract type for Optimization.jl callbacks. + +A callback should be a callable with the signature + + (x, f(x), args...) -> Bool + +where `x` is the parameter being optimized, and `f` is the objective function. A return +value of `true` signals that optimization should stop. + +See the [Optimization.jl docs](https://docs.sciml.ai/Optimization/stable/API/solve/) for +more information. +""" +abstract type AbstractOptimizationCallback end + +""" +$(TYPEDEF) + +A sequence of Optimization.jl callbacks to be executed in order. +""" +struct CallbackSequence{C<:Tuple} <: AbstractOptimizationCallback + "Tuple of Optimization.jl callbacks to be called in order" + callbacks::C +end + +""" +$(SIGNATURES) + +Wrap the sequence `callbacks`. +""" +CallbackSequence(callbacks...) = CallbackSequence(callbacks) + +function (callback::CallbackSequence)(args...) + return mapfoldl(|, callback.callbacks; init=false) do cb + cb === nothing && return false + return cb(args...) + end +end + +""" +$(TYPEDEF) + +A callback to log progress with a `reporter` + +# Fields + +$(FIELDS) +""" +Base.@kwdef mutable struct ProgressCallback{R} <: AbstractOptimizationCallback + "Reporter function, called with signature `report(progress_id, maxiters, try_id, iter_id)`" + @const reporter::R + "An identifier for the progress bar." + @const progress_id::Base.UUID + "Maximum number of iterations" + @const maxiters::Int + "Index of the current try" + try_id::Int + "Index of the current iteration" + iter_id::Int +end + +function (cb::ProgressCallback)(args...) + cb.iter_id += 1 + return false +end +(::ProgressCallback{Nothing})(args...) = false + +""" +$(SIGNATURES) + +Report progress using ProgressLogging.jl. +""" +function report_progress(progress_id::Base.UUID, maxiters::Int, try_id::Int, iter_id::Int) + Base.@logmsg ProgressLogging.ProgressLevel "Optimizing (try $(try_id))" progress = + iter_id / maxiters _id = progress_id + return nothing +end + +""" +$(TYPEDEF) + +A callback that signals termination if the objective value is non-finite and `fail=true`. +""" +struct CheckFiniteValueCallback <: AbstractOptimizationCallback + "Whether to raise an error if the objective function is non-finite" + fail::Bool +end + +function (cb::CheckFiniteValueCallback)(x, fx, ∇fx, args...) + return cb.fail && isnan(fx) || fx == -Inf || any(!isfinite, ∇fx) +end + +struct FillTraceCallback{G,T<:OptimizationTrace} <: AbstractOptimizationCallback + "A function to compute the gradient of the objective function" + grad::G + "An `Optimization` with empty vectors to be filled." + trace::T +end + +function (cb::FillTraceCallback)(x, fx, args...) + # NOTE: Optimization doesn't have an interface for accessing the gradient trace, + # so we need to recompute it ourselves + # see https://github.com/SciML/Optimization.jl/issues/149 + ∇fx = cb.grad(x) + rmul!(∇fx, -1) + + trace = cb.trace + # some backends mutate x, so we must copy it + push!(trace.points, copy(x)) + push!(trace.log_densities, -fx) + push!(trace.gradients, ∇fx) + return false +end + +# callbacks for Optim.jl + +""" +$(TYPEDEF) + +Abstract type for Optim.jl callbacks. + +A callback should be a callable with the signature + + (states::Vector{<:AbstractOptimizerState}) -> Bool + +where `x` is the parameter being optimized, and `f` is the objective function. A return +value of `true` signals that optimization should stop. +""" +abstract type AbstractOptimJLCallback end + +""" +$(TYPEDEF) + +Adaptor for an Optimization.jl callback to be an Optim.jl callback. +""" +struct OptimJLCallbackAdaptor{C} <: AbstractOptimJLCallback + "An Optimization.jl callback to be called." + callback::C +end + +function (cb::OptimJLCallbackAdaptor)(states) + state = states[end] + md = state.metadata + x = md["x"] + fx = state.value + return cb.callback(x, fx, md["g(x)"]) +end +(::OptimJLCallbackAdaptor{Nothing})(states) = false From 2dea0d42e2cb465807275cea2451115c31b1396d Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 13 Feb 2023 22:14:01 +0100 Subject: [PATCH 5/9] Test callback functions --- src/callbacks.jl | 88 ++++++++++++++++++-------------- test/callbacks.jl | 126 ++++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 3 files changed, 178 insertions(+), 37 deletions(-) create mode 100644 test/callbacks.jl diff --git a/src/callbacks.jl b/src/callbacks.jl index 946f2ad6..0c80e7a7 100644 --- a/src/callbacks.jl +++ b/src/callbacks.jl @@ -53,6 +53,55 @@ end """ $(TYPEDEF) +A callback that signals termination if the objective value is non-finite and `fail=true`. +""" +struct CheckFiniteValueCallback <: AbstractOptimizationCallback + "Whether to raise an error if the objective function is non-finite" + fail::Bool +end + +function (cb::CheckFiniteValueCallback)(x, fx, args...) + return cb.fail && (isnan(fx) || fx == -Inf) +end + +""" +$(TYPEDEF) + +A callback to fill an [`OptimizationTrace`](@ref) + +!!! note + Optimization doesn't have an interface for accessing the gradient trace, so this + callback recomputes the gradient. + +# Fields + +$(FIELDS) +""" +struct FillTraceCallback{G,T<:OptimizationTrace} <: AbstractOptimizationCallback + "A function to compute the gradient of the objective function" + grad::G + "An `OptimizationTrace` with empty vectors to be filled." + trace::T +end + +function (cb::FillTraceCallback)(x, fx, args...) + # NOTE: Optimization doesn't have an interface for accessing the gradient trace, + # so we need to recompute it ourselves + # see https://github.com/SciML/Optimization.jl/issues/149 + ∇fx = cb.grad(x) + rmul!(∇fx, -1) + + trace = cb.trace + # some backends mutate x, so we must copy it + push!(trace.points, copy(x)) + push!(trace.log_densities, -fx) + push!(trace.gradients, ∇fx) + return false +end + +""" +$(TYPEDEF) + A callback to log progress with a `reporter` # Fields @@ -73,10 +122,11 @@ Base.@kwdef mutable struct ProgressCallback{R} <: AbstractOptimizationCallback end function (cb::ProgressCallback)(args...) + reporter = cb.reporter + reporter === nothing || reporter(cb.progress_id, cb.maxiters, cb.try_id, cb.iter_id) cb.iter_id += 1 return false end -(::ProgressCallback{Nothing})(args...) = false """ $(SIGNATURES) @@ -89,42 +139,6 @@ function report_progress(progress_id::Base.UUID, maxiters::Int, try_id::Int, ite return nothing end -""" -$(TYPEDEF) - -A callback that signals termination if the objective value is non-finite and `fail=true`. -""" -struct CheckFiniteValueCallback <: AbstractOptimizationCallback - "Whether to raise an error if the objective function is non-finite" - fail::Bool -end - -function (cb::CheckFiniteValueCallback)(x, fx, ∇fx, args...) - return cb.fail && isnan(fx) || fx == -Inf || any(!isfinite, ∇fx) -end - -struct FillTraceCallback{G,T<:OptimizationTrace} <: AbstractOptimizationCallback - "A function to compute the gradient of the objective function" - grad::G - "An `Optimization` with empty vectors to be filled." - trace::T -end - -function (cb::FillTraceCallback)(x, fx, args...) - # NOTE: Optimization doesn't have an interface for accessing the gradient trace, - # so we need to recompute it ourselves - # see https://github.com/SciML/Optimization.jl/issues/149 - ∇fx = cb.grad(x) - rmul!(∇fx, -1) - - trace = cb.trace - # some backends mutate x, so we must copy it - push!(trace.points, copy(x)) - push!(trace.log_densities, -fx) - push!(trace.gradients, ∇fx) - return false -end - # callbacks for Optim.jl """ diff --git a/test/callbacks.jl b/test/callbacks.jl new file mode 100644 index 00000000..26134c75 --- /dev/null +++ b/test/callbacks.jl @@ -0,0 +1,126 @@ +using ForwardDiff, Optim, Pathfinder, Test, UUIDs + +include("test_utils.jl") + +struct DummyCallback{C,D} + cond1::C + cond2::D +end + +function (cb::DummyCallback)(x, fx, args...) + return cb.cond1(x) || cb.cond2(fx) +end + +@testset "callbacks" begin + @testset "CallbackSequence" begin + cb = Pathfinder.CallbackSequence( + DummyCallback(iszero, iszero), DummyCallback(isnan, Base.Fix1(any, isnan)) + ) + @inferred cb(1.0, [2.0, 3.0], nothing) + @test !cb(1.0, [2.0, 3.0], nothing) + @test cb(0.0, [2.0, 3.0], nothing) + @test cb(1.0, [0.0, 0.0]) + @test cb(NaN, [1.0, 2.0]) + @test cb(1.0, [NaN, 2.0]) + end + + @testset "CheckFiniteValueCallback" begin + x = randn(3) + cb = Pathfinder.CheckFiniteValueCallback(true) + @inferred cb(x, 1.0, nothing) + @test !cb(x, 1.0) + @test !cb(x, 2.0) + @test cb(x, NaN) + @test cb(x, -Inf) + + cb2 = Pathfinder.CheckFiniteValueCallback(false) + @inferred cb2(x, 1.0, nothing) + @test !cb2(x, 1.0) + @test !cb2(x, 2.0) + @test !cb2(x, NaN) + @test !cb2(x, -Inf) + end + + @testset "FillTraceCallback" begin + trace = Pathfinder.OptimizationTrace( + Vector{Float64}[], Float64[], Vector{Float64}[] + ) + grad(x) = x / 4 + cb = Pathfinder.FillTraceCallback(grad, trace) + xs = [randn(10) for _ in 1:3] + fxs = rand(3) + for i in eachindex(xs, fxs) + @test !cb(xs[i], fxs[i], nothing) + @test length(trace.points) == + length(trace.log_densities) == + length(trace.gradients) == + i + @test trace.points[i] == xs[i] + @test trace.log_densities[i] == -fxs[i] + @test trace.gradients[i] == -grad(xs[i]) + end + end + + @testset "ProgressCallback" begin + @testset for maxiters in [10, 100], try_id in 1:2 + progress_id = UUIDs.uuid1() + progress_trace = [] + reporter = function (progress_id, maxiters, try_id, iter_id) + push!(progress_trace, (progress_id, maxiters, try_id, iter_id)) + return nothing + end + cb = Pathfinder.ProgressCallback(; + reporter, progress_id, maxiters, try_id, iter_id=0 + ) + @testset for i in 1:3 + @test !cb(randn(10), rand(), nothing) + @test length(progress_trace) == i + @test progress_trace[i] == (progress_id, maxiters, try_id, i - 1) + end + cb2 = Pathfinder.ProgressCallback(; + reporter=nothing, progress_id, maxiters, try_id, iter_id=0 + ) + @test !cb2(randn(10), rand(), nothing) + end + end + + @testset "report_progress" begin + @testset for maxiters in (10, 100), try_id in (1, 2) + logs, = Test.collect_test_logs(; min_level=ProgressLogging.ProgressLevel) do + ProgressLogging.progress(; name="Optimizing") do progress_id + for iter_id in 0:maxiters + Pathfinder.report_progress(progress_id, maxiters, try_id, iter_id) + end + end + end + @test length(logs) == maxiters + 3 + @test logs[1].kwargs[:progress] === nothing + @test logs[1].message == "Optimizing" + for i in 0:maxiters + @test logs[i + 2].kwargs[:progress] == i / maxiters + @test logs[i + 2].message == "Optimizing (try $try_id)" + end + @test logs[maxiters + 3].kwargs[:progress] == "done" + end + end + + @testset "OptimJLCallback" begin + f(x) = -logp_banana(x) + grad(x) = ForwardDiff.gradient(f, x) + trace = Pathfinder.OptimizationTrace( + Vector{Float64}[], Float64[], Vector{Float64}[] + ) + callback = Pathfinder.OptimJLCallbackAdaptor( + Pathfinder.FillTraceCallback(grad, trace) + ) + x0 = randn(2) + options = Optim.Options(; callback, store_trace=true, extended_trace=true) + sol = Optim.optimize(f, x0, LBFGS(), options) + @test trace.points == Optim.x_trace(sol) + @test trace.log_densities == -Optim.f_trace(sol) + @test trace.gradients ≈ -[t.metadata["g(x)"] for t in sol.trace] + + cb2 = Pathfinder.OptimJLCallbackAdaptor(nothing) + @test !cb2([]) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 9bd8462a..d05adaa5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,6 +7,7 @@ Random.seed!(0) @testset "Pathfinder.jl" begin include("transducers.jl") include("woodbury.jl") + include("callbacks.jl") include("optimize.jl") include("inverse_hessian.jl") include("mvnormal.jl") From 094b4fde0fe5daaf7b7f24eddf9cdca56df58443 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 13 Feb 2023 22:43:49 +0100 Subject: [PATCH 6/9] Allow non-UUID progress_ids --- src/callbacks.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/callbacks.jl b/src/callbacks.jl index 0c80e7a7..c3e67813 100644 --- a/src/callbacks.jl +++ b/src/callbacks.jl @@ -108,11 +108,11 @@ A callback to log progress with a `reporter` $(FIELDS) """ -Base.@kwdef mutable struct ProgressCallback{R} <: AbstractOptimizationCallback +Base.@kwdef mutable struct ProgressCallback{R,U} <: AbstractOptimizationCallback "Reporter function, called with signature `report(progress_id, maxiters, try_id, iter_id)`" @const reporter::R "An identifier for the progress bar." - @const progress_id::Base.UUID + @const progress_id::U "Maximum number of iterations" @const maxiters::Int "Index of the current try" @@ -133,7 +133,7 @@ $(SIGNATURES) Report progress using ProgressLogging.jl. """ -function report_progress(progress_id::Base.UUID, maxiters::Int, try_id::Int, iter_id::Int) +function report_progress(progress_id, maxiters::Int, try_id::Int, iter_id::Int) Base.@logmsg ProgressLogging.ProgressLevel "Optimizing (try $(try_id))" progress = iter_id / maxiters _id = progress_id return nothing From 37d6c3298fec8b0b235a3542ef4a65830b29b513 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 13 Feb 2023 22:44:11 +0100 Subject: [PATCH 7/9] Update optimization code --- src/optimize.jl | 197 +++++++++++++++++++++++++++++++++++++++-------- test/optimize.jl | 132 ++++++++++++++----------------- 2 files changed, 224 insertions(+), 105 deletions(-) diff --git a/src/optimize.jl b/src/optimize.jl index 4cdf3479..23d0c159 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -1,4 +1,95 @@ -function build_optim_function(ℓ) +""" +$(TYPEDEF) + +A callable that wraps a LogDensityProblem to be an `fgh!` callable for Optim.jl. + +See the [Optim.jl docs](https://julianlsolvers.github.io/Optim.jl/stable/#user/tipsandtricks/#avoid-repeating-computations) +for details. + +# Fields + +$(TYPEDFIELDS) +""" +struct OptimJLFunction{P} + "An object that implements the LogDensityProblem interface" + prob::P +end + +# avoid repeated computation by computing the highest order derivative required and not +# recomputing the lower order ones +function (fun::OptimJLFunction)(F, G, H, x) + prob = fun.prob + if H !== nothing + lp, glp, Hlp = LogDensityProblems.logdensity_gradient_and_hessian(prob, x) + @. H = -Hlp + if G !== nothing + @. G = -glp + end + F === nothing || return -lp + elseif G !== nothing + lp, glp = LogDensityProblems.logdensity_and_gradient(prob, x) + @. G = -glp + F === nothing || return -lp + elseif F !== nothing + return -LogDensityProblems.logdensity(prob, x) + end + return nothing +end + +""" +$(TYPEDEF) + +A utility object to mimic a `SciMLBase.OptimizationProblem` for use with Optim.jl. + +# Fields + +$(TYPEDFIELDS) +""" +struct OptimJLProblem{F<:OptimJLFunction,U<:AbstractVector{<:Real}} + "An optimization function." + fun::F + "Initial point" + u0::U +end + +function _defines_gradient(fun::SciMLBase.OptimizationFunction) + return fun.grad !== nothing && !(fun.grad isa Bool) +end +_defines_gradient(prob::SciMLBase.OptimizationProblem) = _defines_gradient(prob.f) +_defines_gradient(::Any) = true + +""" +$(SIGNATURES) + +Construct a log-density function with signature `x -> Real` from an optimization function. +""" +get_logp(fun::SciMLBase.OptimizationFunction) = Base.Fix2((-) ∘ fun.f, nothing) +get_logp(fun::OptimJLFunction) = Base.Fix1(LogDensityProblems.logdensity, fun.prob) + +""" +$(SIGNATURES) + +Construct a log-density function with signature `x -> Real` from an optimization problem. +""" +get_logp(prob::SciMLBase.OptimizationProblem) = get_logp(prob.f) +get_logp(prob::OptimJLProblem) = get_logp(prob.fun) + +""" +$(SIGNATURES) + +Build an optimization function from the LogDensityProblem `ℓ`. + +The type of the returned object is determined by `optimizer`, either an +[`OptimJLFunction`](@ref) or a `SciMLBase.OptimizationFunction`. +""" +build_optim_function(ℓ, optimizer) = _build_sciml_optim_function(ℓ) +function build_optim_function( + ℓ, ::Union{Optim.FirstOrderOptimizer,Optim.SecondOrderOptimizer} +) + return OptimJLFunction(ℓ) +end + +function _build_sciml_optim_function(ℓ) f(x, p) = -LogDensityProblems.logdensity(ℓ, x) function grad(res, x, p) _, ∇fx = LogDensityProblems.logdensity_and_gradient(ℓ, x) @@ -13,14 +104,36 @@ function build_optim_function(ℓ) return SciMLBase.OptimizationFunction{true}(f; grad, hess) end +""" +$(SIGNATURES) + +Build an optimization problem from the LogDensityProblem `ℓ` and initial point `x₀`. + +The type of the returned object is determined by `optimizer`, either an +[`OptimJLProbkem`](@ref) or a `SciMLBase.OptimizationProblem`. +""" function build_optim_problem(optim_fun, x₀) return SciMLBase.OptimizationProblem(optim_fun, x₀, nothing) end +function build_optim_problem(optim_fun::OptimJLFunction, x₀) + return OptimJLProblem(optim_fun, x₀) +end + +""" +$(SIGNATURES) +# Returns + +- `sol`: The optimization solution, either a `SciMLBase.OptimizationSolution` or a + `Optim.MultivariateOptimizationResults` +- `trace::OptimizationTrace`: the optimization trace, where the first point is the initial + one. +""" function optimize_with_trace( - prob, + prob::SciMLBase.OptimizationProblem, optimizer; - progress_name="Optimizing", + reporter=report_progress, + try_id=1, progress_id=nothing, maxiters=1_000, callback=nothing, @@ -29,46 +142,66 @@ function optimize_with_trace( ) u0 = prob.u0 fun = prob.f - function ∇f(x) + + function grad(x) SciMLBase.isinplace(fun) || return fun.grad(x, nothing) res = similar(x) fun.grad(res, x, nothing) - rmul!(res, -1) return res end - # caches for the trace of x and f(x) + + # allocate containers for the trace of x and f(x) xs = typeof(u0)[] fxs = typeof(fun.f(u0, nothing))[] ∇fxs = typeof(u0)[] - _callback = _make_optimization_callback( - xs, fxs, ∇fxs, ∇f; progress_name, progress_id, maxiters, callback, fail_on_nonfinite + trace = OptimizationTrace(xs, fxs, ∇fxs) + + _callback = CallbackSequence( + callback, + ProgressCallback(; reporter, progress_id, try_id, maxiters, iter_id=0), + CheckFiniteValueCallback(fail_on_nonfinite), + FillTraceCallback(grad, trace), ) sol = Optimization.solve(prob, optimizer; callback=_callback, maxiters, kwargs...) - return sol, OptimizationTrace(xs, fxs, ∇fxs) + return sol, trace end - -function _make_optimization_callback( - xs, fxs, ∇fxs, ∇f; progress_name, progress_id, maxiters, callback, fail_on_nonfinite +function optimize_with_trace( + prob::OptimJLProblem, + optimizer::Union{Optim.FirstOrderOptimizer,Optim.SecondOrderOptimizer}; + reporter=report_progress, + try_id=1, + progress_id=nothing, + maxiters=1_000, + callback=nothing, + fail_on_nonfinite=true, + kwargs..., ) - return function (x, nfx, args...) - ret = callback !== nothing && callback(x, nfx, args...) - iteration = length(xs) - Base.@logmsg ProgressLogging.ProgressLevel progress_name progress = - iteration / maxiters _id = progress_id - - # some backends mutate x, so we must copy it - push!(xs, copy(x)) - push!(fxs, -nfx) - # NOTE: Optimization doesn't have an interface for accessing the gradient trace, - # so we need to recompute it ourselves - # see https://github.com/SciML/Optimization.jl/issues/149 - ∇fx = ∇f(x) - push!(∇fxs, ∇fx) - - if fail_on_nonfinite && !ret - ret = (isnan(nfx) || nfx == -Inf || any(!isfinite, ∇fx))::Bool - end + _callback = OptimJLCallbackAdaptor( + CallbackSequence( + callback, + ProgressCallback(; reporter, progress_id, try_id, maxiters, iter_id=0), + CheckFiniteValueCallback(fail_on_nonfinite), + ), + ) + options = Optim.Options(; + callback=_callback, + store_trace=true, + extended_trace=true, + iterations=maxiters, + kwargs..., + ) + result = Optim.optimize(Optim.only_fgh!(prob.fun), prob.u0, optimizer, options) - return ret - end + u0 = prob.u0 + xtrace = Optim.x_trace(result) + iterations = min(length(xtrace) - 1, Optim.iterations(result)) + + # containers for the trace of x and ∇f(x) + xs = Vector{typeof(u0)}(undef, iterations + 1) + ∇fxs = Vector{typeof(u0)}(undef, iterations + 1) + + copyto!(xs, xtrace) + fxs = -Optim.f_trace(result) + map!(tr -> -tr.metadata["g(x)"], ∇fxs, Optim.trace(result)) + return result, OptimizationTrace(xs, fxs, ∇fxs) end diff --git a/test/optimize.jl b/test/optimize.jl index e4785677..d7cc78b9 100644 --- a/test/optimize.jl +++ b/test/optimize.jl @@ -1,4 +1,5 @@ using LinearAlgebra +using LogDensityProblems using Optim using OptimizationNLopt using Pathfinder @@ -12,68 +13,48 @@ include("test_utils.jl") n = 20 ℓ = build_logdensityproblem(logp_banana, n) x = randn(n) - fun = Pathfinder.build_optim_function(ℓ) - @test fun isa SciMLBase.OptimizationFunction - @test SciMLBase.isinplace(fun) - @test fun.f(x, nothing) ≈ -ℓ.logp(x) - ∇fx = similar(x) - fun.grad(∇fx, x, nothing) - @test ∇fx ≈ -ℓ.∇logp(x) - H = similar(x, n, n) - fun.hess(H, x, nothing) - @test H ≈ -ℓ.∇²logp(x) + + @testset for optimizer in (Optim.LBFGS(), Optim.Newton()) + fun = @inferred Pathfinder.build_optim_function(ℓ, optimizer) + @test fun isa Pathfinder.OptimJLFunction + @test fun.prob === ℓ + end + + @testset for optimizer in (NLopt.Opt(:LD_LBFGS, n),) + fun = @inferred Pathfinder.build_optim_function(ℓ, optimizer) + @test fun isa SciMLBase.OptimizationFunction + @test SciMLBase.isinplace(fun) + @test fun.f(x, nothing) ≈ -ℓ.logp(x) + ∇fx = similar(x) + fun.grad(∇fx, x, nothing) + @test ∇fx ≈ -ℓ.∇logp(x) + H = similar(x, n, n) + fun.hess(H, x, nothing) + @test H ≈ -ℓ.∇²logp(x) + end end @testset "build_optim_problem" begin n = 20 ℓ = build_logdensityproblem(logp_banana, n) x0 = randn(n) - fun = Pathfinder.build_optim_function(ℓ) - prob = Pathfinder.build_optim_problem(fun, x0) - @test SciMLBase.isinplace(prob) - @test prob isa SciMLBase.OptimizationProblem - @test prob.f === fun - @test prob.u0 == x0 - @test prob.p === nothing -end -@testset "_make_optimization_callback" begin - @testset "callback return value" begin - progress_name = "Optimizing" - progress_id = nothing - maxiters = 1_000 - x = randn(5) - check_vals = [0.0, NaN, -Inf, Inf] - @testset for fail_on_nonfinite in [true, false], - fval in check_vals, - gval in check_vals, - cbfail in [true, false] + @testset "OptimJLFunction" begin + fun = Pathfinder.OptimJLFunction(ℓ) + prob = @inferred Pathfinder.build_optim_problem(fun, x0) + @test prob isa Pathfinder.OptimJLProblem + @test prob.fun === fun + @test prob.u0 === x0 + end - xs = Vector{Float64}[] - fxs = Float64[] - ∇fxs = Vector{Float64}[] - ∇f = function (x) - g = -x - g[end] = gval - return g - end - callback = (x, fx, args...) -> cbfail - cb = Pathfinder._make_optimization_callback( - xs, - fxs, - ∇fxs, - ∇f; - progress_name, - progress_id, - maxiters, - callback, - fail_on_nonfinite, - ) - should_fail = - cbfail || - (fail_on_nonfinite && (isnan(fval) || fval == Inf || !isfinite(gval))) - @test cb(x, -fval) == should_fail - end + @testset "SciML.OptimizationFunction" begin + fun = Pathfinder._build_sciml_optim_function(ℓ) + prob = Pathfinder.build_optim_problem(fun, x0) + @test SciMLBase.isinplace(prob) + @test prob isa SciMLBase.OptimizationProblem + @test prob.f === fun + @test prob.u0 == x0 + @test prob.p === nothing end end @@ -85,8 +66,6 @@ end ℓ = build_logdensityproblem(f, n) x0 = randn(n) - fun = Pathfinder.build_optim_function(ℓ) - prob = Pathfinder.build_optim_problem(fun, x0) optimizers = [ "Optim.BFGS" => Optim.BFGS(), @@ -95,37 +74,44 @@ end "NLopt.Opt" => NLopt.Opt(:LD_LBFGS, n), ] @testset "$name" for (name, optimizer) in optimizers + fun = Pathfinder.build_optim_function(ℓ, optimizer) + prob = Pathfinder.build_optim_problem(fun, x0) optim_sol, optim_trace = Pathfinder.optimize_with_trace(prob, optimizer) - @test optim_sol isa SciMLBase.OptimizationSolution @test optim_trace isa Pathfinder.OptimizationTrace @test optim_trace.points[1] ≈ x0 @test optim_trace.points[end] ≈ μ - @test optim_sol.u ≈ μ @test optim_trace.log_densities ≈ f.(optim_trace.points) @test optim_trace.gradients ≈ ℓ.∇logp.(optim_trace.points) atol = 1e-4 - if !(optimizer isa NLopt.Opt) + if optimizer isa Union{Optim.FirstOrderOptimizer,Optim.SecondOrderOptimizer} + @test optim_sol isa Optim.MultivariateOptimizationResults + options = Optim.Options(; store_trace=true, extended_trace=true) res = Optim.optimize( x -> -f(x), (y, x) -> copyto!(y, -ℓ.∇logp(x)), x0, optimizer, options ) - @test Optim.iterations(res) == length(optim_trace.points) - 1 - @test Optim.x_trace(res) ≈ optim_trace.points - @test Optim.minimizer(res) ≈ optim_trace.points[end] + @test Optim.iterations(res) == Optim.iterations(optim_sol) + @test Optim.x_trace(res) ≈ Optim.x_trace(optim_sol) + @test Optim.minimizer(res) ≈ Optim.minimizer(optim_sol) + else + @test optim_sol isa SciMLBase.OptimizationSolution + @test optim_sol.u ≈ μ end - end - @testset "progress logging" begin - logs, = Test.collect_test_logs(; min_level=ProgressLogging.ProgressLevel) do - ProgressLogging.progress(; name="Optimizing") do progress_id - Pathfinder.optimize_with_trace(prob, Optim.LBFGS(); progress_id) + @testset "progress logging" begin + @testset for try_id in 1:2 + logs, = Test.collect_test_logs(; min_level=ProgressLogging.ProgressLevel) do + ProgressLogging.progress(; name="Optimizing") do progress_id + Pathfinder.optimize_with_trace(prob, Optim.LBFGS(); progress_id, try_id) + end + end + @test logs[1].kwargs[:progress] === nothing + @test logs[1].message == "Optimizing" + @test logs[2].kwargs[:progress] == 0.0 + @test logs[2].message == "Optimizing (try $try_id)" + @test logs[3].kwargs[:progress] == 0.001 + @test logs[end].kwargs[:progress] == "done" end end - @test logs[1].kwargs[:progress] === nothing - @test logs[1].message == "Optimizing" - @test logs[2].kwargs[:progress] == 0.0 - @test logs[2].message == "Optimizing" - @test logs[3].kwargs[:progress] == 0.001 - @test logs[end].kwargs[:progress] == "done" end end From 9e34640641e13fb820cff515162bcdafab0c8566 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 13 Feb 2023 22:47:05 +0100 Subject: [PATCH 8/9] Use Optim.jl directly for Optim solvers --- src/singlepath.jl | 61 +++++++++++++++++++++++----------------------- test/singlepath.jl | 12 ++++++--- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/singlepath.jl b/src/singlepath.jl index 4603a2c3..4096c156 100644 --- a/src/singlepath.jl +++ b/src/singlepath.jl @@ -21,7 +21,8 @@ Container for results of single-path Pathfinder. - `draws_transformed`: `draws` transformed to be draws from `fit_distribution_transformed`. - `fit_iteration::Int`: Iteration at which ELBO estimate was maximized - `num_tries::Int`: Number of tries until Pathfinder succeeded -- `optim_solution::SciMLBase.OptimizationSolution`: Solution object of optimization. +- `optim_solution::Union{Optim.MultivariateOptimizationResults,SciMLBase.OptimizationSolution}`: + Solution object of optimization. - `optim_trace::Pathfinder.OptimizationTrace`: container for optimization trace of points, log-density, and gradient. The first point is the initial point. - `fit_distributions::AbstractVector{Distributions.MvNormal}`: Multivariate normal @@ -92,7 +93,8 @@ constructed using at most the previous `history_length` steps. [Optimization.jl: OptimizationFunction](https://optimization.sciml.ai/stable/API/optimization_function/). - `prob::SciMLBase.OptimizationProblem`: an optimization problem containing a function with the same properties as `fun`, as well as an initial point, in which case `init` and - `dim` are ignored. + `dim` are ignored. If `ntries > 1`, then the initial point `prob.u0` must also be + mutable and may be overwritten. # Keywords - `dim::Int`: dimension of the target distribution, needed only if `fun` is provided and @@ -132,14 +134,20 @@ constructed using at most the previous `history_length` steps. """ function pathfinder end -function pathfinder(ℓ; input=ℓ, kwargs...) +function pathfinder( + ℓ; + input=ℓ, + history_length::Int=DEFAULT_HISTORY_LENGTH, + optimizer=default_optimizer(history_length), + kwargs..., +) _check_log_density_problem(ℓ) dim = LogDensityProblems.dimension(ℓ) - optim_fun = build_optim_function(ℓ) - return pathfinder(optim_fun; input, dim, kwargs...) + optim_fun = build_optim_function(ℓ, optimizer) + return pathfinder(optim_fun; input, dim, history_length, optimizer, kwargs...) end function pathfinder( - optim_fun::SciMLBase.OptimizationFunction; + optim_fun::Union{SciMLBase.OptimizationFunction,OptimJLFunction}; rng=Random.GLOBAL_RNG, init=nothing, dim::Int=-1, @@ -149,20 +157,19 @@ function pathfinder( kwargs..., ) if init !== nothing - _init = init - allow_mutating_init = false + _init = similar(init) + copyto!(_init, init) elseif init === nothing && dim > 0 _init = Vector{Float64}(undef, dim) init_sampler(rng, _init) - allow_mutating_init = true else throw(ArgumentError("An initial point `init` or dimension `dim` must be provided.")) end prob = build_optim_problem(optim_fun, _init) - return pathfinder(prob; rng, input, init_sampler, allow_mutating_init, kwargs...) + return pathfinder(prob; rng, input, init_sampler, kwargs...) end function pathfinder( - prob::SciMLBase.OptimizationProblem; + prob::Union{<:SciMLBase.OptimizationProblem,<:OptimJLProblem}; rng::Random.AbstractRNG=Random.GLOBAL_RNG, history_length::Int=DEFAULT_HISTORY_LENGTH, optimizer=default_optimizer(history_length), @@ -171,10 +178,9 @@ function pathfinder( input=prob, kwargs..., ) - if prob.f.grad === nothing || prob.f.grad isa Bool + _defines_gradient(prob) || throw(ArgumentError("optimization function must define a gradient function.")) - end - logp(x) = -prob.f.f(x, nothing) + logp = get_logp(prob) path_result = ProgressLogging.progress(; name="Optimizing") do progress_id return _pathfinder_try_until_succeed( rng, @@ -188,7 +194,7 @@ function pathfinder( ) end @unpack ( - itry, + try_id, success, optim_prob, optim_solution, @@ -201,7 +207,7 @@ function pathfinder( if !success ndraws_elbo_actual = 0 - @warn "Pathfinder failed after $itry tries. Increase `ntries`, inspect the model for numerical instability, or provide a more suitable `init_sampler`." + @warn "Pathfinder failed after $try_id tries. Increase `ntries`, inspect the model for numerical instability, or provide a more suitable `init_sampler`." else elbo_estimate_opt = elbo_estimates[fit_iteration] ndraws_elbo_actual = ndraws_elbo @@ -238,7 +244,7 @@ function pathfinder( fit_distribution_transformed, draws_transformed, fit_iteration, - itry, + try_id, optim_solution, optim_trace, fit_distributions, @@ -254,23 +260,16 @@ function _pathfinder_try_until_succeed( ntries::Int=1_000, init_scale=2, init_sampler=UniformSampler(init_scale), - allow_mutating_init::Bool=false, kwargs..., ) - itry = 1 - progress_name = "Optimizing (try 1)" - result = _pathfinder(rng, prob, logp; progress_name, kwargs...) - _prob = prob - while !result.success && itry < ntries - if itry == 1 && !allow_mutating_init - _prob = deepcopy(prob) - end - itry += 1 - init_sampler(rng, _prob.u0) - progress_name = "Optimizing (try $itry)" - result = _pathfinder(rng, _prob, logp; progress_name, kwargs...) + try_id = 1 + result = _pathfinder(rng, prob, logp; try_id, kwargs...) + while !result.success && try_id < ntries + try_id += 1 + init_sampler(rng, prob.u0) + result = _pathfinder(rng, prob, logp; try_id, kwargs...) end - return (; itry, optim_prob=_prob, result...) + return (; try_id, optim_prob=prob, result...) end function _pathfinder( diff --git a/test/singlepath.jl b/test/singlepath.jl index f70a062f..e298e8c7 100644 --- a/test/singlepath.jl +++ b/test/singlepath.jl @@ -27,10 +27,13 @@ include("test_utils.jl") ℓ = build_logdensityproblem(logp, 5) init = randn(dim) Random.seed!(rng, seed) - result = @inferred pathfinder(ℓ; init, ndraws, rng, executor) + result = @inferred Pathfinder.PathfinderResult pathfinder( + ℓ; init, ndraws, rng, executor + ) @test result isa PathfinderResult @test result.input === ℓ - @test result.optim_prob isa SciMLBase.OptimizationProblem + @test result.optim_prob isa Pathfinder.OptimJLProblem + @test result.optim_solution isa Optim.MultivariateOptimizationResults @test result.logp(init) ≈ logp(init) @test result.rng === rng @test result.optimizer === @@ -46,7 +49,6 @@ include("test_utils.jl") @test result.fit_distribution_transformed === result.fit_distribution @test result.draws_transformed === result.draws @test result.num_tries ≥ 1 - @test result.optim_solution isa SciMLBase.OptimizationSolution @test result.optim_trace isa Pathfinder.OptimizationTrace @test result.fit_distributions isa Vector{typeof(fit_distribution)} @test length(result.fit_distributions) == length(result.optim_trace) @@ -93,7 +95,9 @@ include("test_utils.jl") executor = rng isa MersenneTwister ? SequentialEx() : ThreadedEx() Random.seed!(rng, seed) - result = @inferred pathfinder(ℓ; rng, optimizer, ndraws_elbo, executor) + result = @inferred Pathfinder.PathfinderResult pathfinder( + ℓ; rng, optimizer, ndraws_elbo, executor + ) @test result.input === ℓ @test result.fit_distribution.Σ ≈ Σ rtol = 1e-1 @test result.optimizer == optimizer From d7f4bc3b4ef1b8f91366f22e64b0a3830b6cb882 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Mon, 13 Feb 2023 22:47:28 +0100 Subject: [PATCH 9/9] Update multipath accordingly --- src/multipath.jl | 22 +++++++++++++++------- test/multipath.jl | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/multipath.jl b/src/multipath.jl index 150b6905..6479cdd8 100644 --- a/src/multipath.jl +++ b/src/multipath.jl @@ -123,14 +123,23 @@ for approximating expectations with respect to ``p``. """ function multipathfinder end -function multipathfinder(ℓ, ndraws::Int; input=ℓ, kwargs...) +function multipathfinder( + ℓ, + ndraws::Int; + input=ℓ, + history_length::Int=DEFAULT_HISTORY_LENGTH, + optimizer=default_optimizer(history_length), + kwargs..., +) _check_log_density_problem(ℓ) dim = LogDensityProblems.dimension(ℓ) - optim_fun = build_optim_function(ℓ) - return multipathfinder(optim_fun, ndraws; input, dim, kwargs...) + optim_fun = build_optim_function(ℓ, optimizer) + return multipathfinder( + optim_fun, ndraws; input, dim, history_length, optimizer, kwargs... + ) end function multipathfinder( - optim_fun::SciMLBase.OptimizationFunction, + optim_fun::Union{SciMLBase.OptimizationFunction,OptimJLFunction}, ndraws::Int; init=nothing, input=optim_fun, @@ -145,9 +154,8 @@ function multipathfinder( importance::Bool=true, kwargs..., ) - if optim_fun.grad === nothing || optim_fun.grad isa Bool + _defines_gradient(optim_fun) || throw(ArgumentError("optimization function must define a gradient function.")) - end if init === nothing nruns > 0 || throw( ArgumentError("A positive `nruns` must be set or `init` must be provided.") @@ -159,7 +167,7 @@ function multipathfinder( if ndraws > ndraws_per_run * nruns @warn "More draws requested than total number of draws across replicas. Draws will not be unique." end - logp(x) = -optim_fun.f(x, nothing) + logp = get_logp(optim_fun) # run pathfinder independently from each starting point trans = Transducers.Map() do (init_i) diff --git a/test/multipath.jl b/test/multipath.jl index ace61956..ec1ed0ee 100644 --- a/test/multipath.jl +++ b/test/multipath.jl @@ -37,7 +37,7 @@ include("test_utils.jl") ) @test result isa MultiPathfinderResult @test result.input === ℓ - @test result.optim_fun isa SciMLBase.OptimizationFunction + @test result.optim_fun isa Pathfinder.OptimJLFunction @test result.rng === rng @test result.optimizer === Pathfinder.default_optimizer(Pathfinder.DEFAULT_HISTORY_LENGTH)