Skip to content

Commit

Permalink
maximal flexibility
Browse files Browse the repository at this point in the history
  • Loading branch information
Jutho committed Jan 23, 2025
1 parent 6e823d4 commit 6037131
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 84 deletions.
14 changes: 11 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
name = "OptimKit"
uuid = "77e91f04-9b3b-57a6-a776-40b61faaebe0"
authors = ["Jutho Haegeman"]
version = "0.4"
version = "0.4.0"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63"

[compat]
Aqua = "0.8"
LinearAlgebra = "1"
Printf = "1"
Random = "1"
ScopedValues = "1.3.0"
Test = "1"
julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Random"]
test = ["Test", "Random", "Aqua"]
10 changes: 10 additions & 0 deletions src/OptimKit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,18 @@ module OptimKit

using LinearAlgebra: LinearAlgebra
using Printf
using ScopedValues
using Base: @kwdef

# Default values for the keyword arguments using ScopedValues
const LS_MAXITER = ScopedValue(10)
const LS_MAXFG = ScopedValue(20)
const LS_VERBOSITY = ScopedValue(1)

const GRADTOL = ScopedValue(1e-8)
const MAXITER = ScopedValue(1_000_000)
const VERBOSITY = ScopedValue(1)

_retract(x, d, α) = (x + α * d, d)
_inner(x, v1, v2) = v1 === v2 ? LinearAlgebra.norm(v1)^2 : LinearAlgebra.dot(v1, v2)
_transport!(v, xold, d, α, xnew) = v
Expand Down
53 changes: 32 additions & 21 deletions src/cg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,41 @@ abstract type CGFlavor end
ConjugateGradient(;
flavor::CGFlavor=HagerZhang(),
restart::Int=typemax(Int);
maxiter::Int=typemax(Int),
gradtol::Real=1e-8,
verbosity::Int=1,
ls_verbosity::Int=1,
linesearch::AbstractLineSearch=HagerZhangLineSearch())
maxiter::Int=MAXITER[], # 1_000_000
gradtol::Real=GRADTOL[], # 1e-8
verbosity::Int=VERBOSITY[], # 1
ls_maxiter::Int=LS_MAXITER[], # 10
ls_maxfg::Int=LS_MAXFG[], # 20
ls_verbosity::Int=LS_VERBOSITY[], # 1
linesearch = HagerZhangLineSearch(maxiter=ls_maxiter, maxfg=ls_maxfg, verbosity=ls_verbosity))
ConjugateGradient optimization algorithm.
## Fields
- `flavor`: The flavor of the conjugate gradient algorithm (for selecting the β parameter)
## Parameters
- `flavor`: The flavor of the conjugate gradient algorithm (for selecting the β parameter; see below)
- `restart::Int`: The number of iterations after which to reset the search direction.
- `maxiter::Int`: The maximum number of iterations.
- `gradtol::T`: The tolerance for the norm of the gradient.
- `linesearch::L`: The line search algorithm to use.
- `verbosity::Int`: The verbosity level of the optimization algorithm.
- `ls_maxiter::Int`: The maximum number of iterations for the line search.
- `ls_maxfg::Int`: The maximum number of function evaluations for the line search.
- `ls_verbosity::Int`: The verbosity level of the line search algorithm.
- `linesearch`: The line search algorithm to use; if a custom value is provided,
it overrides `ls_maxiter`, `ls_maxfg`, and `ls_verbosity`.
Both verbosity levels use the following scheme:
- 0: no output
- 1: only warnings upon non-convergence
- 2: convergence information at the end of the algorithm
- 3: progress information after each iteration
- 4: more detailed information (only for the linesearch)
The `flavor` parameter can take the values
- `HagerZhang(; η::Real=4 // 10, θ::Real=1 // 1)`: Hager-Zhang formula for β
- `HestenesStiefel(; pos = true)`: Hestenes-Stiefel formula for β
- `FletcherReeves()`: Fletcher-Reeves formula for β
- `PolakRibiere(; pos = true)`: Polak-Ribiere formula for β
- `DaiYuan()`: Dai-Yuan formula for β
"""
struct ConjugateGradient{F<:CGFlavor,T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
flavor::F
Expand All @@ -36,20 +48,21 @@ struct ConjugateGradient{F<:CGFlavor,T<:Real,L<:AbstractLineSearch} <: Optimizat
gradtol::T
verbosity::Int
linesearch::L
ls_maxiter::Int
ls_verbosity::Int
end
function ConjugateGradient(;
flavor::CGFlavor=HagerZhang(),
restart::Int=typemax(Int),
maxiter::Int=typemax(Int),
gradtol::Real=1e-8,
verbosity::Int=1,
ls_maxiter::Int=10,
ls_verbosity::Int=1,
linesearch::AbstractLineSearch=HagerZhangLineSearch())
return ConjugateGradient(flavor, restart, maxiter, gradtol, verbosity,
linesearch, ls_maxiter, ls_verbosity)
maxiter::Int=MAXITER[],
gradtol::Real=GRADTOL[],
verbosity::Int=VERBOSITY[],
ls_maxiter::Int=LS_MAXITER[],
ls_maxfg::Int=LS_MAXFG[],
ls_verbosity::Int=LS_VERBOSITY[],
linesearch::AbstractLineSearch=HagerZhangLineSearch(;
maxiter=ls_maxiter,
maxfg=ls_maxfg,
verbosity=ls_verbosity))
return ConjugateGradient(flavor, restart, maxiter, gradtol, verbosity, linesearch)
end

function optimize(fg, x, alg::ConjugateGradient;
Expand Down Expand Up @@ -118,9 +131,7 @@ function optimize(fg, x, alg::ConjugateGradient;
_dlast[] = η
x, f, g, ξ, α, nfg = alg.linesearch(fg, x, η, (f, g);
initialguess=α,
retract=retract, inner=inner,
maxiter=alg.ls_maxiter,
verbosity=alg.ls_verbosity)
retract=retract, inner=inner)
numfg += nfg
numiter += 1
x, f, g = finalize!(x, f, g, numiter)
Expand Down
49 changes: 26 additions & 23 deletions src/gd.jl
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
"""
struct GradientDescent{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
GradientDescent(;
maxiter = typemax(Int),
gradtol::Real = 1e-8,
verbosity::Int = 1,
ls_verbosity::Int = 1,
linesearch::AbstractLineSearch = HagerZhangLineSearch())
maxiter::Int=MAXITER[], # 1_000_000
gradtol::Real=GRADTOL[], # 1e-8
verbosity::Int=VERBOSITY[], # 1
ls_maxiter::Int=LS_MAXITER[], # 10
ls_maxfg::Int=LS_MAXFG[], # 20
ls_verbosity::Int=LS_VERBOSITY[], # 1
linesearch = HagerZhangLineSearch(maxiter=ls_maxiter, maxfg=ls_maxfg, verbosity=ls_verbosity))
Gradient Descent optimization algorithm.
## Fields
## Parameters
- `maxiter::Int`: The maximum number of iterations.
- `gradtol::T`: The tolerance for the norm of the gradient.
- `acceptfirst::Bool`: Whether to accept the first step of the line search.
- `linesearch::L`: The line search algorithm to use.
- `verbosity::Int`: The verbosity level of the optimization algorithm.
- `ls_maxiter::Int`: The maximum number of iterations for the line search.
- `ls_maxfg::Int`: The maximum number of function evaluations for the line search.
- `ls_verbosity::Int`: The verbosity level of the line search algorithm.
- `linesearch`: The line search algorithm to use; if a custom value is provided,
it overrides `ls_maxiter`, `ls_maxfg`, and `ls_verbosity`.
Both verbosity levels use the following scheme:
Both `verbosity` and `ls_verbosity` use the following scheme:
- 0: no output
- 1: only warnings upon non-convergence
- 2: convergence information at the end of the algorithm
Expand All @@ -29,18 +33,19 @@ struct GradientDescent{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
gradtol::T
verbosity::Int
linesearch::L
ls_maxiter::Int
ls_verbosity::Int
end
function GradientDescent(;
maxiter::Int=typemax(Int),
gradtol::Real=1e-8,
verbosity::Int=1,
ls_maxiter::Int=10,
ls_verbosity::Int=1,
linesearch::AbstractLineSearch=HagerZhangLineSearch())
return GradientDescent(maxiter, gradtol, verbosity,
linesearch, ls_maxiter, ls_verbosity)
maxiter::Int=MAXITER[],
gradtol::Real=GRADTOL[],
verbosity::Int=VERBOSITY[],
ls_maxiter::Int=LS_MAXITER[],
ls_maxfg::Int=LS_MAXFG[],
ls_verbosity::Int=LS_VERBOSITY[],
linesearch::AbstractLineSearch=HagerZhangLineSearch(;
maxiter=ls_maxiter,
maxfg=ls_maxfg,
verbosity=ls_verbosity))
return GradientDescent(maxiter, gradtol, verbosity, linesearch)
end

function optimize(fg, x, alg::GradientDescent;
Expand Down Expand Up @@ -83,9 +88,7 @@ function optimize(fg, x, alg::GradientDescent;
_dlast[] = η
x, f, g, ξ, α, nfg = alg.linesearch(fg, x, η, (f, g);
initialguess=α,
retract=retract, inner=inner,
maxiter=alg.ls_maxiter,
verbosity=alg.ls_verbosity)
retract=retract, inner=inner)
numfg += nfg
numiter += 1
x, f, g = finalize!(x, f, g, numiter)
Expand Down
49 changes: 26 additions & 23 deletions src/lbfgs.jl
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
"""
struct LBFGS{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
LBFGS(m::Int = 8;
maxiter = typemax(Int),
gradtol::Real = 1e-8,
acceptfirst::Bool = true,
verbosity::Int = 1,
ls_verbosity::Int = 1,
linesearch::AbstractLineSearch = HagerZhangLineSearch())
maxiter::Int=MAXITER[], # 1_000_000
gradtol::Real=GRADTOL[], # 1e-8
verbosity::Int=VERBOSITY[], # 1
ls_maxiter::Int=LS_MAXITER[], # 10
ls_maxfg::Int=LS_MAXFG[], # 20
ls_verbosity::Int=LS_VERBOSITY[], # 1
linesearch = HagerZhangLineSearch(maxiter=ls_maxiter, maxfg=ls_maxfg, verbosity=ls_verbosity))
LBFGS optimization algorithm.
## Fields
## Parameters
- `m::Int`: The number of previous iterations to store for the limited memory BFGS approximation.
- `maxiter::Int`: The maximum number of iterations.
- `gradtol::T`: The tolerance for the norm of the gradient.
- `acceptfirst::Bool`: Whether to accept the first step of the line search.
- `linesearch::L`: The line search algorithm to use.
- `verbosity::Int`: The verbosity level of the optimization algorithm.
- `acceptfirst::Bool`: Whether to accept the first step of the line search.
- `ls_maxiter::Int`: The maximum number of iterations for the line search.
- `ls_maxfg::Int`: The maximum number of function evaluations for the line search.
- `ls_verbosity::Int`: The verbosity level of the line search algorithm.
- `linesearch`: The line search algorithm to use; if a custom value is provided,
it overrides `ls_maxiter`, `ls_maxfg`, and `ls_verbosity`.
Both verbosity levels use the following scheme:
Both `verbosity` and `ls_verbosity` use the following scheme:
- 0: no output
- 1: only warnings upon non-convergence
- 2: convergence information at the end of the algorithm
Expand All @@ -33,19 +37,20 @@ struct LBFGS{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
acceptfirst::Bool
verbosity::Int
linesearch::L
ls_maxiter::Int
ls_verbosity::Int
end
function LBFGS(m::Int=8;
maxiter::Int=typemax(Int),
gradtol::Real=1e-8,
acceptfirst::Bool=true,
verbosity::Int=1,
ls_maxiter::Int=10,
ls_verbosity::Int=1,
linesearch::AbstractLineSearch=HagerZhangLineSearch())
return LBFGS(m, maxiter, gradtol, acceptfirst, verbosity,
linesearch, ls_maxiter, ls_verbosity)
maxiter::Int=MAXITER[],
gradtol::Real=GRADTOL[],
verbosity::Int=VERBOSITY[],
ls_maxiter::Int=LS_MAXITER[],
ls_maxfg::Int=LS_MAXFG[],
ls_verbosity::Int=LS_VERBOSITY[],
linesearch::AbstractLineSearch=HagerZhangLineSearch(;
maxiter=ls_maxiter,
maxfg=ls_maxfg,
verbosity=ls_verbosity))
return LBFGS(m, maxiter, gradtol, acceptfirst, verbosity, linesearch)
end

function optimize(fg, x, alg::LBFGS;
Expand Down Expand Up @@ -103,9 +108,7 @@ function optimize(fg, x, alg::LBFGS;
initialguess=one(f),
acceptfirst=alg.acceptfirst,
# for some reason, line search seems to converge to solution alpha = 2 in most cases if acceptfirst = false. If acceptfirst = true, the initial value of alpha can immediately be accepted. This typically leads to a more erratic convergence of normgrad, but to less function evaluations in the end.
retract=retract, inner=inner,
maxiter=alg.ls_maxiter,
verbosity=alg.ls_verbosity)
retract=retract, inner=inner)
numfg += nfg
numiter += 1
x, f, g = finalize!(x, f, g, numiter)
Expand Down
23 changes: 16 additions & 7 deletions src/linesearches.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ struct HagerZhangLineSearch{T<:Real} <: AbstractLineSearch
θ::T # parameter regulating the bisection step
γ::T # parameter triggering the bisection step, namely if bracket reduction rate is slower than `γ`
ρ::T # parameter controlling the initial bracket expansion rate
maxiter::Int # hard limit on the number of iterations
maxfg::Int # soft limit on the number of function evaluations
verbosity::Int # verbosity level
end

"""
Expand Down Expand Up @@ -57,16 +60,19 @@ function HagerZhangLineSearch(; c₁::Real=1 // 10,
ϵ::Real=1 // 10^6,
θ::Real=1 // 2,
γ::Real=2 // 3,
ρ::Real=5 // 1)
return HagerZhangLineSearch(promote(c₁, c₂, ϵ, θ, γ, ρ)...)
ρ::Real=5 // 1,
maxiter::Int=LS_MAXITER[],
maxfg::Int=LS_MAXFG[],
verbosity::Int=LS_VERBOSITY[])
return HagerZhangLineSearch(promote(c₁, c₂, ϵ, θ, γ, ρ)..., maxiter, maxfg, verbosity)
end

# implementation as function
"""
(ls::HagerZhangLineSearch)(fg, x₀, η₀, fg₀ = fg(x₀);
retract = _retract, inner = _inner,
initialguess = one(fg₀[1]), acceptfirst = false,
maxiter = 50, maxfuneval = 100, verbosity = 0)
maxiter = ls.maxiter, maxfg = lsmaxfg, verbosity = ls.verbosity)
Perform a Hager-Zhang line search to find a step length that satisfies the (approximate) Wolfe conditions.
Expand All @@ -84,7 +90,7 @@ Perform a Hager-Zhang line search to find a step length that satisfies the (appr
- `acceptfirst::Bool`: Parameter that controls whether the initial guess can be accepted if it satisfies the strong Wolfe conditions. Defaults to `false`, thus requiring
at least one line search iteration and one extra function evaluation.
- `maxiter::Int`: Hard limit on the number of iterations. Default is `50`.
- `maxfuneval::Int`: Soft limit on the number of function evaluations. Default is `100`.
- `maxfg::Int`: Soft limit on the number of function evaluations. Default is `100`.
- `verbosity::Int`: The verbosity level (see below). Default is `0`.
### Verbosity Levels
Expand All @@ -104,8 +110,11 @@ Perform a Hager-Zhang line search to find a step length that satisfies the (appr
"""
function (ls::HagerZhangLineSearch)(fg, x₀, η₀, fg₀=fg(x₀);
retract=_retract, inner=_inner,
initialguess::Real=one(fg₀[1]), acceptfirst::Bool=false,
maxiter::Int=50, maxfuneval::Int=100, verbosity::Int=0)
initialguess::Real=one(fg₀[1]),
acceptfirst::Bool=false,
maxiter::Int=ls.maxiter,
maxfg::Int=ls.maxfg,
verbosity::Int=ls.verbosity)
(f₀, g₀) = fg₀
ϕ₀ = f₀
dϕ₀ = inner(x₀, g₀, η₀)
Expand Down Expand Up @@ -134,7 +143,7 @@ function (ls::HagerZhangLineSearch)(fg, x₀, η₀, fg₀=fg(x₀);
@info @sprintf("Linesearch converged after %d iterations and %d function evaluations:\nα = %.2e, dϕ = %.2e, ϕ - ϕ₀ = %.2e",
k, numfg, α, dϕ, f - ϕ₀)
return x, f, g, ξ, α, numfg
elseif k == maxiter || numfg >= maxfuneval
elseif k >= maxiter || numfg >= maxfg
verbosity >= 1 &&
@warn @sprintf("Linesearch not converged after %d iterations and %d function evaluations:\nα = %.2e, dϕ = %.2e, ϕ - ϕ₀ = %.2e",
k, numfg, α, dϕ, f - ϕ₀)
Expand Down
Loading

0 comments on commit 6037131

Please sign in to comment.