Basic implementation of Anderson acceleration (#101)

* add a basic implementation of Anderson acceleration and include a short description in README.md
JuliaNLSolvers · Jul 1, 2017 · fff21cf · fff21cf
1 parent e10aa1d
commit fff21cf
Show file tree

Hide file tree

Showing 6 changed files with 140 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -237,7 +237,7 @@ If  `g!` conserves the sparsity structure of `gx`, `gx` will always have the sam
 
 # Fine tunings
 
-Two algorithms are currently available. The choice between the two is achieved
+Three algorithms are currently available. The choice between these is achieved
 by setting the optional `method` argument of `nlsolve`. The default algorithm
 is the trust region method.
 
@@ -270,6 +270,23 @@ By default, no linesearch is performed.
 **Note:** it is assumed that a passed linesearch function will at least update the solution
 vector and evaluate the function at the new point.
 
+## Anderson acceleration
+
+Also known as DIIS or Pulay mixing, this method is based on the
+acceleration of the fixed-point iteration `xn+1 = xn + β f(xn)`, where
+by default `β=1`. It does not use Jacobian information or linesearch,
+but has a history whose size is controlled by the `m` parameter: `m=0`
+(the default) corresponds to the simple fixed-point iteration above,
+and higher values use a larger history size to accelerate the
+iterations. Higher values of `m` usually increase the speed of
+convergence, but increase the storage and computation requirements and
+might lead to instabilities. This method is useful to accelerate a
+fixed-point iteration `xn+1 = g(xn)` (in which case use this solver
+with `f(x) = g(x) - x`).
+
+Reference: H. Walker, P. Ni, Anderson acceleration for fixed-point
+iterations, SIAM Journal on Numerical Analysis, 2011
+
 ## Common options
 
 Other optional arguments to `nlsolve`, available for all algorithms, are:
@@ -367,6 +384,7 @@ julia> fvec
 * Broyden updating of Jacobian in trust-region
 * Homotopy methods
 * [LMMCP algorithm by C. Kanzow](http://www.mathematik.uni-wuerzburg.de/~kanzow/)
+* QR updating of the least-squares problem in the Anderson acceleration solver
 
 # Related Packages
 

diff --git a/src/NLsolve.jl b/src/NLsolve.jl
@@ -39,6 +39,7 @@ include("mcp_func_defs.jl")
 include("utils.jl")
 include("newton.jl")
 include("trust_region.jl")
+include("anderson.jl")
 include("autodiff.jl")
 include("mcp.jl")
 

diff --git a/src/anderson.jl b/src/anderson.jl
@@ -0,0 +1,96 @@
+# Notations from Walker & Ni, "Anderson acceleration for fixed-point iterations", SINUM 2011
+# Attempts to accelerate the iteration xn+1 = xn + β f(x)
+
+@views function anderson_{T}(df::AbstractDifferentiableMultivariateFunction,
+                      x0::Vector{T},
+                      xtol::T,
+                      ftol::T,
+                      iterations::Integer,
+                      store_trace::Bool,
+                      show_trace::Bool,
+                      extended_trace::Bool,
+                      m::Integer,
+                      β :: Real)
+
+    f_calls = 0
+    N = length(x0)
+    xs = zeros(T,N,m+1) #ring buffer storing the iterates, from newest to oldest
+    gs = zeros(T,N,m+1) #ring buffer storing the g of the iterates, from newest to oldest
+    residuals = zeros(T, N, m) #matrix of residuals used for the least-squares problem
+    alphas = zeros(T, m) #coefficients obtained by least-squares
+    fx = similar(x0) # temp variable to store f!
+    xs[:,1] = x0
+    errs = zeros(iterations)
+    n = 1
+    tr = SolverTrace()
+    tracing = store_trace || show_trace || extended_trace
+    old_x = xs[:,1]
+    x_converged, f_converged, converged = false, false, false
+
+    for n = 1:iterations
+        # fixed-point iteration
+        df.f!(xs[:,1], fx)
+        f_calls += 1
+        gs[:,1] .= xs[:,1] .+ β.*fx
+        x_converged, f_converged, converged = assess_convergence(gs[:,1], old_x, fx, xtol, ftol)
+
+        # FIXME: How should this flag be set?
+        mayterminate = false
+
+        if tracing
+            dt = Dict()
+            if extended_trace
+                dt["x"] = copy(xs[:,1])
+                dt["f(x)"] = copy(fx)
+            end
+            update!(tr,
+                    n,
+                    maximum(abs,fx),
+                    n > 1 ? sqeuclidean(xs[:,1],old_x) : convert(T,NaN),
+                    dt,
+                    store_trace,
+                    show_trace)            
+        end
+
+        if converged
+            break
+        end
+
+        #update of new_x
+        m_eff = min(n-1,m)
+        new_x = copy(gs[:,1])
+        if m_eff > 0
+            residuals[:, 1:m_eff] .= (gs[:,2:m_eff+1] .- xs[:,2:m_eff+1]) .- (gs[:,1] .- xs[:,1])
+            alphas[1:m_eff] .= residuals[:,1:m_eff] \ (xs[:,1] .- gs[:,1])
+            for i = 1:m_eff
+                new_x .+= alphas[i].*(gs[:,i+1] .- gs[:,1])
+            end
+        end
+
+        xs = circshift(xs,(0,1)) # no in-place circshift, unfortunately...
+        gs = circshift(gs,(0,1))
+        old_x = m > 1 ? xs[:,2] : copy(xs[:,1])
+        xs[:,1] = new_x
+    end
+
+    # returning gs[:,1] rather than xs[:,1] would be better here if
+    # xn+1 = xn+beta*f(xn) is convergent, but the convergence
+    # criterion is not guaranteed
+    return SolverResults("Anderson m=$m β=$β",
+                         x0, copy(xs[:,1]), maximum(abs,fx),
+                         n, x_converged, xtol, f_converged, ftol, tr,
+                         f_calls, 0)
+end
+
+function anderson{T}(df::AbstractDifferentiableMultivariateFunction,
+                     initial_x::Vector{T},
+                     xtol::Real,
+                     ftol::Real,
+                     iterations::Integer,
+                     store_trace::Bool,
+                     show_trace::Bool,
+                     extended_trace::Bool,
+                     m::Integer,
+                     beta::Real)
+    anderson_(df, initial_x, convert(T, xtol), convert(T, ftol), iterations, store_trace, show_trace, extended_trace, m, beta)
+end
diff --git a/src/nlsolve_func_defs.jl b/src/nlsolve_func_defs.jl
@@ -9,7 +9,9 @@ function nlsolve{T}(df::AbstractDifferentiableMultivariateFunction,
                  extended_trace::Bool = false,
                  linesearch!::Function = no_linesearch!,
                  factor::Real = one(T),
-                 autoscale::Bool = true)
+                 autoscale::Bool = true,
+                 m::Integer = 0,
+                 beta::Real = 1.0)
     if extended_trace
         show_trace = true
     end
@@ -24,6 +26,9 @@ function nlsolve{T}(df::AbstractDifferentiableMultivariateFunction,
         trust_region(df, initial_x, xtol, ftol, iterations,
                      store_trace, show_trace, extended_trace, factor,
                      autoscale)
+    elseif method == :anderson
+        anderson(df, initial_x, xtol, ftol, iterations,
+                 store_trace, show_trace, extended_trace, m, beta)
     else
         throw(ArgumentError("Unknown method $method"))
     end
@@ -41,12 +46,15 @@ function nlsolve{T}(f!::Function,
                  extended_trace::Bool = false,
                  linesearch!::Function = no_linesearch!,
                  factor::Real = one(T),
-                 autoscale::Bool = true)
+                 autoscale::Bool = true,
+                 m::Integer = 0,
+                 beta::Real = 1.0)
     nlsolve(DifferentiableMultivariateFunction(f!, g!),
             initial_x, method = method, xtol = xtol, ftol = ftol,
             iterations = iterations, store_trace = store_trace,
             show_trace = show_trace, extended_trace = extended_trace,
-            linesearch! = linesearch!, factor = factor, autoscale = autoscale)
+            linesearch! = linesearch!, factor = factor, autoscale = autoscale,
+            m = m, beta = beta)
 end
 
 function nlsolve{T}(f!::Function,
@@ -61,6 +69,8 @@ function nlsolve{T}(f!::Function,
                  linesearch!::Function = no_linesearch!,
                  factor::Real = one(T),
                  autoscale::Bool = true,
+                 m::Integer = 0,
+                 beta::Real = 1.0,
                  autodiff::Bool = false)
     if !autodiff
         df = DifferentiableMultivariateFunction(f!)
@@ -71,5 +81,6 @@ function nlsolve{T}(f!::Function,
             initial_x, method = method, xtol = xtol, ftol = ftol,
             iterations = iterations, store_trace = store_trace,
             show_trace = show_trace, extended_trace = extended_trace,
-            linesearch! = linesearch!, factor = factor, autoscale = autoscale)
+            linesearch! = linesearch!, factor = factor, autoscale = autoscale,
+            m = m, beta = beta)
 end
diff --git a/src/utils.jl b/src/utils.jl
@@ -9,9 +9,9 @@ end
 
 wnorm{T}(w::AbstractVector{T}, x::AbstractVector{T}) = sqrt(wdot(w, x, w, x))
 
-function assess_convergence(x::Vector,
-                            x_previous::Vector,
-                            f::Vector,
+function assess_convergence(x::AbstractArray,
+                            x_previous::AbstractArray,
+                            f::AbstractArray,
                             xtol::Real,
                             ftol::Real)
     x_converged, f_converged = false, false

diff --git a/test/2by2.jl b/test/2by2.jl
@@ -44,6 +44,12 @@ r = nlsolve(df, [ -0.5f0; 1.4f0], method = :newton, linesearch! = LineSearches.b
 @test converged(r)
 @test norm(r.zero - [ 0; 1]) < 1e-6
 
+# test local convergence of Anderson: close to a fixed-point and with
+# a small beta, f should be almost affine, in which case Anderson is
+# equivalent to GMRES and should converge
+r = nlsolve(df, [ 0.01; .99], method = :anderson, m = 10, beta=.01)
+@test converged(r)
+@test norm(r.zero - [ 0; 1]) < 1e-8
 
 # Tests of other lineasearches are disabled, they are not stable across runs
 #r = nlsolve(df, [ -0.5; 1.4], method = :newton, linesearch! = LineSearches.hz_linesearch!, ftol = 1e-6)