Skip to content

Commit

Permalink
Basic implementation of Anderson acceleration (#101)
Browse files Browse the repository at this point in the history
* add a basic implementation of Anderson acceleration and include a short description in README.md
  • Loading branch information
antoine-levitt authored and pkofod committed Jul 1, 2017
1 parent e10aa1d commit fff21cf
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 8 deletions.
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ If `g!` conserves the sparsity structure of `gx`, `gx` will always have the sam

# Fine tunings

Two algorithms are currently available. The choice between the two is achieved
Three algorithms are currently available. The choice between these is achieved
by setting the optional `method` argument of `nlsolve`. The default algorithm
is the trust region method.

Expand Down Expand Up @@ -270,6 +270,23 @@ By default, no linesearch is performed.
**Note:** it is assumed that a passed linesearch function will at least update the solution
vector and evaluate the function at the new point.

## Anderson acceleration

Also known as DIIS or Pulay mixing, this method is based on the
acceleration of the fixed-point iteration `xn+1 = xn + β f(xn)`, where
by default `β=1`. It does not use Jacobian information or linesearch,
but has a history whose size is controlled by the `m` parameter: `m=0`
(the default) corresponds to the simple fixed-point iteration above,
and higher values use a larger history size to accelerate the
iterations. Higher values of `m` usually increase the speed of
convergence, but increase the storage and computation requirements and
might lead to instabilities. This method is useful to accelerate a
fixed-point iteration `xn+1 = g(xn)` (in which case use this solver
with `f(x) = g(x) - x`).

Reference: H. Walker, P. Ni, Anderson acceleration for fixed-point
iterations, SIAM Journal on Numerical Analysis, 2011

## Common options

Other optional arguments to `nlsolve`, available for all algorithms, are:
Expand Down Expand Up @@ -367,6 +384,7 @@ julia> fvec
* Broyden updating of Jacobian in trust-region
* Homotopy methods
* [LMMCP algorithm by C. Kanzow](http://www.mathematik.uni-wuerzburg.de/~kanzow/)
* QR updating of the least-squares problem in the Anderson acceleration solver

# Related Packages

Expand Down
1 change: 1 addition & 0 deletions src/NLsolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ include("mcp_func_defs.jl")
include("utils.jl")
include("newton.jl")
include("trust_region.jl")
include("anderson.jl")
include("autodiff.jl")
include("mcp.jl")

Expand Down
96 changes: 96 additions & 0 deletions src/anderson.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Notations from Walker & Ni, "Anderson acceleration for fixed-point iterations", SINUM 2011
# Attempts to accelerate the iteration xn+1 = xn + β f(x)

@views function anderson_{T}(df::AbstractDifferentiableMultivariateFunction,
x0::Vector{T},
xtol::T,
ftol::T,
iterations::Integer,
store_trace::Bool,
show_trace::Bool,
extended_trace::Bool,
m::Integer,
β :: Real)

f_calls = 0
N = length(x0)
xs = zeros(T,N,m+1) #ring buffer storing the iterates, from newest to oldest
gs = zeros(T,N,m+1) #ring buffer storing the g of the iterates, from newest to oldest
residuals = zeros(T, N, m) #matrix of residuals used for the least-squares problem
alphas = zeros(T, m) #coefficients obtained by least-squares
fx = similar(x0) # temp variable to store f!
xs[:,1] = x0
errs = zeros(iterations)
n = 1
tr = SolverTrace()
tracing = store_trace || show_trace || extended_trace
old_x = xs[:,1]
x_converged, f_converged, converged = false, false, false

for n = 1:iterations
# fixed-point iteration
df.f!(xs[:,1], fx)
f_calls += 1
gs[:,1] .= xs[:,1] .+ β.*fx
x_converged, f_converged, converged = assess_convergence(gs[:,1], old_x, fx, xtol, ftol)

# FIXME: How should this flag be set?
mayterminate = false

if tracing
dt = Dict()
if extended_trace
dt["x"] = copy(xs[:,1])
dt["f(x)"] = copy(fx)
end
update!(tr,
n,
maximum(abs,fx),
n > 1 ? sqeuclidean(xs[:,1],old_x) : convert(T,NaN),
dt,
store_trace,
show_trace)
end

if converged
break
end

#update of new_x
m_eff = min(n-1,m)
new_x = copy(gs[:,1])
if m_eff > 0
residuals[:, 1:m_eff] .= (gs[:,2:m_eff+1] .- xs[:,2:m_eff+1]) .- (gs[:,1] .- xs[:,1])
alphas[1:m_eff] .= residuals[:,1:m_eff] \ (xs[:,1] .- gs[:,1])
for i = 1:m_eff
new_x .+= alphas[i].*(gs[:,i+1] .- gs[:,1])
end
end

xs = circshift(xs,(0,1)) # no in-place circshift, unfortunately...
gs = circshift(gs,(0,1))
old_x = m > 1 ? xs[:,2] : copy(xs[:,1])
xs[:,1] = new_x
end

# returning gs[:,1] rather than xs[:,1] would be better here if
# xn+1 = xn+beta*f(xn) is convergent, but the convergence
# criterion is not guaranteed
return SolverResults("Anderson m=$m β=",
x0, copy(xs[:,1]), maximum(abs,fx),
n, x_converged, xtol, f_converged, ftol, tr,
f_calls, 0)
end

function anderson{T}(df::AbstractDifferentiableMultivariateFunction,
initial_x::Vector{T},
xtol::Real,
ftol::Real,
iterations::Integer,
store_trace::Bool,
show_trace::Bool,
extended_trace::Bool,
m::Integer,
beta::Real)
anderson_(df, initial_x, convert(T, xtol), convert(T, ftol), iterations, store_trace, show_trace, extended_trace, m, beta)
end
19 changes: 15 additions & 4 deletions src/nlsolve_func_defs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ function nlsolve{T}(df::AbstractDifferentiableMultivariateFunction,
extended_trace::Bool = false,
linesearch!::Function = no_linesearch!,
factor::Real = one(T),
autoscale::Bool = true)
autoscale::Bool = true,
m::Integer = 0,
beta::Real = 1.0)
if extended_trace
show_trace = true
end
Expand All @@ -24,6 +26,9 @@ function nlsolve{T}(df::AbstractDifferentiableMultivariateFunction,
trust_region(df, initial_x, xtol, ftol, iterations,
store_trace, show_trace, extended_trace, factor,
autoscale)
elseif method == :anderson
anderson(df, initial_x, xtol, ftol, iterations,
store_trace, show_trace, extended_trace, m, beta)
else
throw(ArgumentError("Unknown method $method"))
end
Expand All @@ -41,12 +46,15 @@ function nlsolve{T}(f!::Function,
extended_trace::Bool = false,
linesearch!::Function = no_linesearch!,
factor::Real = one(T),
autoscale::Bool = true)
autoscale::Bool = true,
m::Integer = 0,
beta::Real = 1.0)
nlsolve(DifferentiableMultivariateFunction(f!, g!),
initial_x, method = method, xtol = xtol, ftol = ftol,
iterations = iterations, store_trace = store_trace,
show_trace = show_trace, extended_trace = extended_trace,
linesearch! = linesearch!, factor = factor, autoscale = autoscale)
linesearch! = linesearch!, factor = factor, autoscale = autoscale,
m = m, beta = beta)
end

function nlsolve{T}(f!::Function,
Expand All @@ -61,6 +69,8 @@ function nlsolve{T}(f!::Function,
linesearch!::Function = no_linesearch!,
factor::Real = one(T),
autoscale::Bool = true,
m::Integer = 0,
beta::Real = 1.0,
autodiff::Bool = false)
if !autodiff
df = DifferentiableMultivariateFunction(f!)
Expand All @@ -71,5 +81,6 @@ function nlsolve{T}(f!::Function,
initial_x, method = method, xtol = xtol, ftol = ftol,
iterations = iterations, store_trace = store_trace,
show_trace = show_trace, extended_trace = extended_trace,
linesearch! = linesearch!, factor = factor, autoscale = autoscale)
linesearch! = linesearch!, factor = factor, autoscale = autoscale,
m = m, beta = beta)
end
6 changes: 3 additions & 3 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ end

wnorm{T}(w::AbstractVector{T}, x::AbstractVector{T}) = sqrt(wdot(w, x, w, x))

function assess_convergence(x::Vector,
x_previous::Vector,
f::Vector,
function assess_convergence(x::AbstractArray,
x_previous::AbstractArray,
f::AbstractArray,
xtol::Real,
ftol::Real)
x_converged, f_converged = false, false
Expand Down
6 changes: 6 additions & 0 deletions test/2by2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ r = nlsolve(df, [ -0.5f0; 1.4f0], method = :newton, linesearch! = LineSearches.b
@test converged(r)
@test norm(r.zero - [ 0; 1]) < 1e-6

# test local convergence of Anderson: close to a fixed-point and with
# a small beta, f should be almost affine, in which case Anderson is
# equivalent to GMRES and should converge
r = nlsolve(df, [ 0.01; .99], method = :anderson, m = 10, beta=.01)
@test converged(r)
@test norm(r.zero - [ 0; 1]) < 1e-8

# Tests of other lineasearches are disabled, they are not stable across runs
#r = nlsolve(df, [ -0.5; 1.4], method = :newton, linesearch! = LineSearches.hz_linesearch!, ftol = 1e-6)
Expand Down

0 comments on commit fff21cf

Please sign in to comment.