Jutho · Jutho · Jan 24, 2025 · Feb 9, 2024 · Jan 17, 2025 · Jan 17, 2025
diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
@@ -0,0 +1 @@
+style = "yas"
diff --git a/.github/workflows/ci-julia-nightly.yml b/.github/workflows/ci-julia-nightly.yml
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,7 +1,18 @@
 name: CI
 on:
-  - push
-  - pull_request
+  push:
+    branches:
+      - 'master'
+      - 'main'
+      - 'release-'
+    tags: '*'
+  pull_request:
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   test:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
@@ -10,23 +21,51 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.4'
-          - '1' # automatically expands to the latest stable 1.x release of Julia
+          - '1.8' # minimum version because of ScopedValues.jl
+          - '1' # expands to latest version
+          - 'lts' # expands to latest LTS version
         os:
           - ubuntu-latest
           - macOS-latest
           - windows-latest
         arch:
           - x64
     steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-buildpkg@latest
       - uses: julia-actions/julia-runtest@latest
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v5
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
         with:
           file: lcov.info
+  test-nightly:
+    needs: test
+    name: Julia nightly - ${{ matrix.os }} - ${{ matrix.arch }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - 'nightly'
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        arch:
+          - x64
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v2
+      - uses: julia-actions/julia-buildpkg@latest
+      - uses: julia-actions/julia-runtest@latest
diff --git a/Project.toml b/Project.toml
@@ -1,18 +1,26 @@
 name = "OptimKit"
 uuid = "77e91f04-9b3b-57a6-a776-40b61faaebe0"
 authors = ["Jutho Haegeman"]
-version = "0.3.2"
+version = "0.4.0"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63"
 
 [compat]
-julia = "1"
+Aqua = "0.8"
+LinearAlgebra = "1"
+Printf = "1"
+Random = "1"
+ScopedValues = "1"
+Test = "1"
+julia = "1.8"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Random"]
+test = ["Test", "Random", "Aqua"]
diff --git a/src/OptimKit.jl b/src/OptimKit.jl
@@ -1,29 +1,44 @@
 module OptimKit
 
-import LinearAlgebra
+using LinearAlgebra: LinearAlgebra
 using Printf
+using ScopedValues
+using Base: @kwdef
+
+# Default values for the keyword arguments using ScopedValues
+const LS_MAXITER = ScopedValue(10)
+const LS_MAXFG = ScopedValue(20)
+const LS_VERBOSITY = ScopedValue(1)
+
+const GRADTOL = ScopedValue(1e-8)
+const MAXITER = ScopedValue(1_000_000)
+const VERBOSITY = ScopedValue(1)
 
 _retract(x, d, α) = (x + α * d, d)
 _inner(x, v1, v2) = v1 === v2 ? LinearAlgebra.norm(v1)^2 : LinearAlgebra.dot(v1, v2)
 _transport!(v, xold, d, α, xnew) = v
 _scale!(v, α) = LinearAlgebra.rmul!(v, α)
 _add!(vdst, vsrc, α) = LinearAlgebra.axpy!(α, vsrc, vdst)
 
-_precondition(x, g) = g
+_precondition(x, g) = deepcopy(g)
 _finalize!(x, f, g, numiter) = x, f, g
 
-abstract type OptimizationAlgorithm
-end
+abstract type OptimizationAlgorithm end
 
 const _xlast = Ref{Any}()
 const _glast = Ref{Any}()
 const _dlast = Ref{Any}()
 
 """
-    x, f, g, numfg, history =
-    optimize(fg, x, algorithm; retract = _retract, inner = _inner,
-                    transport! = _transport!, scale! = _scale!, add! = _add!,
-                    isometrictransport = (transport! == _transport! && inner == _inner))
+    optimize(fg, x, alg;
+                  precondition=_precondition,
+                  (finalize!)=_finalize!,
+                  hasconverged=DefaultHasConverged(alg.gradtol),
+                  shouldstop=DefaultShouldStop(alg.maxiter),
+                  retract=_retract, inner=_inner, (transport!)=_transport!,
+                  (scale!)=_scale!, (add!)=_add!,
+                  isometrictransport=(transport! == _transport! && inner == _inner))
+    -> x, f, g, numfg, history
 
 Optimize (minimize) the objective function returned as the first value of `fg`, where the
 second value contains the gradient, starting from a point `x` and using the algorithm
@@ -33,11 +48,44 @@ Returns the final point `x`, the coresponding function value `f` and gradient `g
 total number of calls to `fg`, and the history of the gradient norm across the different
 iterations.
 
-Check the README of this package for further details on creating an algorithm instance, as well as for the meaning of the keyword arguments and their default values.
+The algorithm is run until either `hasconverged(x, f, g, norm(g))` returns `true` or
+`shouldstop(x, f, g, numfg, numiter, time)` returns `true`. The latter case happening before
+the former is considered to be a failure to converge, and a warning is issued.
+
+The keyword arguments are:
+
+-   `precondition::Function`: A function that takes the current point `x` and the gradient `g`
+    and returns a preconditioned gradient. By default, the identity is used.
+-   `finalize!::Function`: A function that takes the final point `x`, the function value `f`,
+    the gradient `g`, and the iteration number, and returns a possibly modified values for
+    `x`, `f` and `g`. By default, the identity is used.
+    It is the user's responsibility to ensure that the modified values do not lead to
+    inconsistencies within the optimization algorithm.
+-   `hasconverged::Function`: A function that takes the current point `x`, the function value `f`,
+    the gradient `g`, and the norm of the gradient, and returns a boolean indicating whether
+    the optimization has converged. By default, the norm of the gradient is compared to the
+    tolerance `gradtol` as encoded in the algorithm instance.
+-   `shouldstop::Function`: A function that takes the current point `x`, the function value `f`,
+    the gradient `g`, the number of calls to `fg`, the iteration number, and the time spent
+    so far, and returns a boolean indicating whether the optimization should stop. By default,
+    the number of iterations is compared to the maximum number of iterations as encoded in the
+    algorithm instance.
+
+Check the README of this package for further details on creating an algorithm instance,
+as well as for the meaning of the remaining keyword arguments and their default values.
+
+!!! Warning
+
+    The default values of `hasconverged` and `shouldstop` are provided to ensure continuity
+    with the previous versions of this package. However, this behaviour might change in the
+    future.
+
+Also see [`GradientDescent`](@ref), [`ConjugateGradient`](@ref), [`LBFGS`](@ref).
 """
 function optimize end
 
 include("linesearches.jl")
+include("terminate.jl")
 include("gd.jl")
 include("cg.jl")
 include("lbfgs.jl")
@@ -61,23 +109,23 @@ Test the compatibility between the computation of the gradient, the retraction a
 
 It is up to the user to check that the values in `dfs1` and `dfs2` match up to expected precision, by inspecting the numerical values or plotting them. If these values don't match, the linesearch in `optimize` cannot be expected to work.
 """
-function optimtest(fg, x, d = fg(x)[2]; alpha = -0.1:0.001:0.1, retract = _retract, inner = _inner)
+function optimtest(fg, x, d=fg(x)[2]; alpha=-0.1:0.001:0.1, retract=_retract, inner=_inner)
     f0, g0 = fg(x)
-    fs = Vector{typeof(f0)}(undef, length(alpha)-1)
+    fs = Vector{typeof(f0)}(undef, length(alpha) - 1)
     dfs1 = similar(fs, length(alpha) - 1)
     dfs2 = similar(fs, length(alpha) - 1)
-    for i = 1:length(alpha) - 1
+    for i in 1:(length(alpha) - 1)
         a1 = alpha[i]
-        a2 = alpha[i+1]
+        a2 = alpha[i + 1]
         f1, = fg(retract(x, d, a1)[1])
         f2, = fg(retract(x, d, a2)[1])
-        dfs1[i] = (f2-f1)/(a2 - a1)
-        xmid, dmid = retract(x, d, (a1+a2)/2)
+        dfs1[i] = (f2 - f1) / (a2 - a1)
+        xmid, dmid = retract(x, d, (a1 + a2) / 2)
         fmid, gmid = fg(xmid)
         fs[i] = fmid
         dfs2[i] = inner(xmid, dmid, gmid)
     end
-    alphas = collect((alpha[2:end] + alpha[1:end-1])/2)
+    alphas = collect((alpha[2:end] + alpha[1:(end - 1)]) / 2)
     return alphas, fs, dfs1, dfs2
 end