Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove lower bound on size of epsilon #131

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions src/derivative.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ function derivative(f, ftype::Symbol, dtype::Symbol)
if ftype == :scalar
return x::Number -> finite_difference(f, float(x), dtype)
elseif ftype == :vector
return x::Vector -> finite_difference(f, float(x), dtype)
return x::AbstractVector -> finite_difference(f, float(x), dtype)
else
error("ftype must :scalar or :vector")
end
Expand Down Expand Up @@ -35,11 +35,11 @@ else
Base.ctranspose(f::Function) = derivative(f)
end

function jacobian(f, x::Vector{T}, dtype::Symbol) where T <: Number
function jacobian(f, x::AbstractVector{T}, dtype::Symbol) where T <: Number
finite_difference_jacobian(f, x, dtype)
end
function jacobian(f, dtype::Symbol)
g(x::Vector) = finite_difference_jacobian(f, x, dtype)
g(x::AbstractVector) = finite_difference_jacobian(f, x, dtype)
return g
end
jacobian(f) = jacobian(f, :central)
Expand All @@ -48,21 +48,21 @@ function second_derivative(f, g, ftype::Symbol, dtype::Symbol)
if ftype == :scalar
return x::Number -> finite_difference_hessian(f, g, x, dtype)
elseif ftype == :vector
return x::Vector -> finite_difference_hessian(f, g, x, dtype)
return x::AbstractVector -> finite_difference_hessian(f, g, x, dtype)
else
error("ftype must :scalar or :vector")
end
end
function second_derivative(f, g, x::Union{T, Vector{T}}, dtype::Symbol) where T <: Number
function second_derivative(f, g, x::Union{T, AbstractVector{T}}, dtype::Symbol) where T <: Number
finite_difference_hessian(f, g, x, dtype)
end
function hessian(f, g, x::Union{T, Vector{T}}, dtype::Symbol) where T <: Number
function hessian(f, g, x::Union{T, AbstractVector{T}}, dtype::Symbol) where T <: Number
finite_difference_hessian(f, g, x, dtype)
end
function second_derivative(f, g, x::Union{T, Vector{T}}) where T <: Number
function second_derivative(f, g, x::Union{T, AbstractVector{T}}) where T <: Number
finite_difference_hessian(f, g, x, :central)
end
function hessian(f, g, x::Union{T, Vector{T}}) where T <: Number
function hessian(f, g, x::Union{T, AbstractVector{T}}) where T <: Number
finite_difference_hessian(f, g, x, :central)
end
function second_derivative(f, x::Number, dtype::Symbol)
Expand All @@ -71,10 +71,10 @@ end
function hessian(f, x::Number, dtype::Symbol)
finite_difference_hessian(f, derivative(f), x, dtype)
end
function second_derivative(f, x::Vector{T}, dtype::Symbol) where T <: Number
function second_derivative(f, x::AbstractVector{T}, dtype::Symbol) where T <: Number
finite_difference_hessian(f, gradient(f), x, dtype)
end
function hessian(f, x::Vector{T}, dtype::Symbol) where T <: Number
function hessian(f, x::AbstractVector{T}, dtype::Symbol) where T <: Number
finite_difference_hessian(f, gradient(f), x, dtype)
end
function second_derivative(f, x::Number)
Expand All @@ -83,10 +83,10 @@ end
function hessian(f, x::Number)
finite_difference_hessian(f, derivative(f), x, :central)
end
function second_derivative(f, x::Vector{T}) where T <: Number
function second_derivative(f, x::AbstractVector{T}) where T <: Number
finite_difference_hessian(f, gradient(f), x, :central)
end
function hessian(f, x::Vector{T}) where T <: Number
function hessian(f, x::AbstractVector{T}) where T <: Number
finite_difference_hessian(f, gradient(f), x, :central)
end
second_derivative(f, g, dtype::Symbol) = second_derivative(f, g, :scalar, dtype)
Expand Down
57 changes: 25 additions & 32 deletions src/finite_difference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,21 @@
macro forwardrule(x, e)
x, e = esc(x), esc(e)
quote
$e = sqrt(eps(eltype($x))) * max(one(eltype($x)), abs($x))
$e = sqrt(eps($x^2))
end
end

macro centralrule(x, e)
x, e = esc(x), esc(e)
quote
$e = cbrt(eps(eltype($x))) * max(one(eltype($x)), abs($x))
$e = cbrt(eps($x^3))
end
end

macro hessianrule(x, e)
x, e = esc(x), esc(e)
quote
$e = eps(eltype($x))^(1/4) * max(one(eltype($x)), abs($x))
$e = eps($x^4)^(1/4)
end
end

Expand Down Expand Up @@ -231,38 +231,31 @@ end
##############################################################################

function finite_difference_hessian!(f,
x::AbstractVector{S},
H::Array{T}) where {S <: Number,
H::Array{T},
x::AbstractVector{S}) where {S <: Number,
T <: Number}
# What is the dimension of x?
n = length(x)

epsilon = NaN
# TODO: Remove all these copies
xpp, xpm, xmp, xmm = copy(x), copy(x), copy(x), copy(x)
fx = f(x)
for i = 1:n
xi = x[i]
@hessianrule x[i] epsilon
xpp[i], xmm[i] = xi + epsilon, xi - epsilon
H[i, i] = (f(xpp) - 2*fx + f(xmm)) / epsilon^2
@centralrule x[i] epsiloni
xp = xi + epsiloni
xm = xi - epsiloni
xpp[i], xpm[i], xmp[i], xmm[i] = xp, xp, xm, xm
for j = i+1:n
xj = x[j]
@centralrule x[j] epsilonj
xp = xj + epsilonj
xm = xj - epsilonj
xpp[j], xpm[j], xmp[j], xmm[j] = xp, xm, xp, xm
H[i, j] = (f(xpp) - f(xpm) - f(xmp) + f(xmm))/(4*epsiloni*epsilonj)
xpp[j], xpm[j], xmp[j], xmm[j] = xj, xj, xj, xj
n = size(H)[1]
e(j) = [i == j for i in 1:n] # j-th basis vector
hⱼ, hₖ = NaN, NaN
f₀ = f(x)
for j = 1:n
@hessianrule x[j] hⱼ # According to Numerical Recipes 5.7
f₊ = f(x + hⱼ * e(j))
f₋ = f(x - hⱼ * e(j))
H[j,j] = (f₋ - 2f₀ + f₊) / hⱼ^2
for k = j+1:n # Off-diagonal terms
@hessianrule x[k] hₖ # According to Numerical Recipes 5.7
f₊₊ = f(x + hⱼ * e(j) + hₖ * e(k))
f₊₋ = f(x + hⱼ * e(j) - hₖ * e(k))
f₋₊ = f(x - hⱼ * e(j) + hₖ * e(k))
f₋₋ = f(x - hⱼ * e(j) - hₖ * e(k))
H[j,k] = (f₊₊ - f₋₊ - f₊₋ + f₋₋) / (4 * hⱼ * hₖ)
j ≠ k ? H[k,j] = H[j,k] : nothing
end
xpp[i], xpm[i], xmp[i], xmm[i] = xi, xi, xi, xi
end
Compat.LinearAlgebra.copytri!(H,'U')
return H
end

function finite_difference_hessian(f,
x::AbstractVector{T}) where T <: Number
# What is the dimension of x?
Expand All @@ -272,7 +265,7 @@ function finite_difference_hessian(f,
H = Matrix{Float64}(undef, n, n)

# Mutate the allocated Hessian
finite_difference_hessian!(f, x, H)
finite_difference_hessian!(f, H, x)

# Return the Hessian
return H
Expand Down
48 changes: 28 additions & 20 deletions test/check_derivative.jl
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
@test check_derivative(x -> sin(x), x -> cos(x), 0.0) < 10e-4
@test check_derivative(x -> sin(x), x -> cos(x), 1.0) < 10e-4
@test check_derivative(x -> sin(x), x -> cos(x), 10.0) < 10e-4
@test check_derivative(x -> sin(x), x -> cos(x), 100.0) < 10e-4
@test check_derivative(x -> sin(x), x -> cos(x), 1000.0) < 10e-4
@testset "check_derivative: f(x) = sin(x)" begin
@test check_derivative(x -> sin(x), x -> cos(x), 0.0) < 10e-4
@test check_derivative(x -> sin(x), x -> cos(x), 1.0) < 10e-4
@test check_derivative(x -> sin(x), x -> cos(x), 10.0) < 10e-4
@test check_derivative(x -> sin(x), x -> cos(x), 100.0) < 10e-4
@test check_derivative(x -> sin(x), x -> cos(x), 1000.0) < 10e-4
end

@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [0.0, 0.0]) < 10e-4
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [1.0, 1.0]) < 10e-4
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [10.0, 10.0]) < 10e-4
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [100.0, 100.0]) < 10e-4
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [1000.0, 1000.0]) < 10e-4
@testset "check_gradient: f(x) = sin(x[1]) + cos(x[2])" begin
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [0.1, 0.1]) < 10e-4
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [1.0, 1.0]) < 10e-4
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [10.0, 10.0]) < 10e-4
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [100.0, 100.0]) < 10e-4
@test check_gradient(x -> sin(x[1]) + cos(x[2]), x -> [cos(x[1]), -sin(x[2])], [1000.0, 1000.0]) < 10e-4
end

@test check_second_derivative(x -> sin(x), x -> -sin(x), 0.0) < 10e-4
@test check_second_derivative(x -> sin(x), x -> -sin(x), 1.0) < 10e-4
@test check_second_derivative(x -> sin(x), x -> -sin(x), 10.0) < 10e-4
@test check_second_derivative(x -> sin(x), x -> -sin(x), 100.0) < 10e-4
@test check_second_derivative(x -> sin(x), x -> -sin(x), 1000.0) < 10e-4
@testset "check_second_derivative: f(x) = sin(x)" begin
@test check_second_derivative(x -> sin(x), x -> -sin(x), 0.0) < 10e-4
@test check_second_derivative(x -> sin(x), x -> -sin(x), 1.0) < 10e-4
@test check_second_derivative(x -> sin(x), x -> -sin(x), 10.0) < 10e-4
@test check_second_derivative(x -> sin(x), x -> -sin(x), 100.0) < 10e-4
@test check_second_derivative(x -> sin(x), x -> -sin(x), 1000.0) < 10e-4
end

@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [0.0, 0.0]) < 10e-4
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [1.0, 1.0]) < 10e-4
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [10.0, 10.0]) < 10e-4
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [100.0, 100.0]) < 10e-4
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [1000.0, 1000.0]) < 10e-4
@testset "check_hessian: f(x) = sin(x[1]) + cos(x[2])" begin
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [0.1, 0.1]) < 10e-4
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [1.0, 1.0]) < 10e-4
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [10.0, 10.0]) < 10e-4
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [100.0, 100.0]) < 10e-4
@test check_hessian(x -> sin(x[1]) + cos(x[2]), x -> [-sin(x[1]) 0.0; 0.0 -cos(x[2])], [1000.0, 1000.0]) < 10e-4
end
70 changes: 45 additions & 25 deletions test/derivative.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,75 @@
# derivative()
#

f1(x::Real) = sin(x)
@test norm(derivative(f1, :scalar, :forward)(0.0) - cos(0.0)) < 10e-4
@test norm(derivative(f1, :scalar, :central)(0.0) - cos(0.0)) < 10e-4
@test norm(derivative(f1, :forward)(0.0) - cos(0.0)) < 10e-4
@test norm(derivative(f1, :central)(0.0) - cos(0.0)) < 10e-4
@test norm(derivative(f1)(0.0) - cos(0.0)) < 10e-4
@testset "derivative()" begin
@testset "f(x::Real) = sin(x)" begin
f1(x::Real) = sin(x)
@test norm(derivative(f1, :scalar, :forward)(0.0) - cos(0.0)) < 10e-4
@test norm(derivative(f1, :scalar, :central)(0.0) - cos(0.0)) < 10e-4
@test norm(derivative(f1, :forward)(0.0) - cos(0.0)) < 10e-4
@test norm(derivative(f1, :central)(0.0) - cos(0.0)) < 10e-4
@test norm(derivative(f1)(0.0) - cos(0.0)) < 10e-4
end

f2(x::Vector) = sin(x[1])
@test norm(derivative(f2, :vector, :forward)([0.0]) .- cos(0.0)) < 10e-4
@test norm(derivative(f2, :vector, :central)([0.0]) .- cos(0.0)) < 10e-4
@testset "f(x::Vector) = sin(x[1])" begin
f2(x::Vector) = sin(x[1])
@test norm(derivative(f2, :vector, :forward)([0.0]) .- cos(0.0)) < 10e-4
@test norm(derivative(f2, :vector, :central)([0.0]) .- cos(0.0)) < 10e-4
end
end

#
# adjoint overloading
#

f3(x::Real) = sin(x)
for x in Compat.range(0.0, stop=0.1, length=11) # seq()
@test norm(f3'(x) - cos(x)) < 10e-4
@testset "adjoint overloading" begin
@testset "f(x::Vector) = sin(x[1])" begin
f3(x::Real) = sin(x)
for x in Compat.range(0.0, stop=0.1, length=11) # seq()
@test norm(f3'(x) - cos(x)) < 10e-4
end
end
end

#
# gradient()
#

f4(x::Vector) = (100.0 - x[1])^2 + (50.0 - x[2])^2
@test norm(Calculus.gradient(f4, :forward)([100.0, 50.0]) - [0.0, 0.0]) < 10e-4
@test norm(Calculus.gradient(f4, :central)([100.0, 50.0]) - [0.0, 0.0]) < 10e-4
@test norm(Calculus.gradient(f4)([100.0, 50.0]) - [0.0, 0.0]) < 10e-4
@testset "gradient()" begin
f4(x::Vector) = (100.0 - x[1])^2 + (50.0 - x[2])^2
@test norm(Calculus.gradient(f4, :forward)([100.0, 50.0]) - [0.0, 0.0]) < 10e-4
@test norm(Calculus.gradient(f4, :central)([100.0, 50.0]) - [0.0, 0.0]) < 10e-4
@test norm(Calculus.gradient(f4)([100.0, 50.0]) - [0.0, 0.0]) < 10e-4
end

#
# jacobian()
#

@test norm(Calculus.jacobian(identity, rand(3), :forward) - Matrix(1.0I, 3, 3)) < 10e-4
@test norm(Calculus.jacobian(identity, rand(3), :central) - Matrix(1.0I, 3, 3)) < 10e-4
@testset "jacobian()" begin
@test norm(Calculus.jacobian(identity, rand(3), :forward) - Matrix(1.0I, 3, 3)) < 10e-4
@test norm(Calculus.jacobian(identity, rand(3), :central) - Matrix(1.0I, 3, 3)) < 10e-4
end

#
# second_derivative()
#

@test norm(second_derivative(x -> x^2)(0.0) - 2.0) < 10e-4
@test norm(second_derivative(x -> x^2)(1.0) - 2.0) < 10e-4
@test norm(second_derivative(x -> x^2)(10.0) - 2.0) < 10e-4
@test norm(second_derivative(x -> x^2)(100.0) - 2.0) < 10e-4
@testset "second_derivative()" begin
@test norm(second_derivative(x -> x^2)(0.0) - 2.0) < 10e-4
@test norm(second_derivative(x -> x^2)(1.0) - 2.0) < 10e-4
@test norm(second_derivative(x -> x^2)(10.0) - 2.0) < 10e-4
@test norm(second_derivative(x -> x^2)(100.0) - 2.0) < 10e-4
end

#
# hessian()
#

f5(x) = sin(x[1]) + cos(x[2])
@test norm(Calculus.gradient(f5)([0.0, 0.0]) - [cos(0.0), -sin(0.0)]) < 10e-4
@test norm(hessian(f5)([0.0, 0.0]) - [-sin(0.0) 0.0; 0.0 -cos(0.0)]) < 10e-4
@testset "hessian()" begin
f5(x) = sin(x[1]) + cos(x[2])
x₁, x₂ = 1.0, 1.0
x = [x₁, x₂]
@test norm(Calculus.gradient(f5)(x) - [cos(x₁), -sin(x₂)]) < 10e-4
@test norm(hessian(f5)(x) - [-sin(x₁) 0.0; 0.0 -cos(x₂)]) < 10e-4
end
Loading