Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add second-order derivative functions #122

Merged
merged 12 commits into from
Feb 5, 2024
3 changes: 3 additions & 0 deletions docs/src/implementer_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ They are just listed here to help readers figure out the code structure:
- `derivative` calls `jacobian`
- `gradient` calls `jacobian`
- `hessian` calls `jacobian` and `gradient`
- `second_derivative` calls `derivative`
gerlero marked this conversation as resolved.
Show resolved Hide resolved
- `value_and_jacobian` calls `jacobian`
- `value_and_derivative` calls `value_and_jacobian`
- `value_and_gradient` calls `value_and_jacobian`
- `value_and_hessian` calls `jacobian` and `gradient`
- `value_and_second_derivative` calls `second_derivative`
- `value_gradient_and_hessian` calls `value_and_jacobian` and `gradient`
- `value_derivative_and_second_derivative` calls `value_and_derivative` and `second_derivative`
- `pushforward_function` calls `jacobian`
- `value_and_pushforward_function` calls `pushforward_function`
- `pullback_function` calls `value_and_pullback_function`
Expand Down
7 changes: 5 additions & 2 deletions docs/src/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,24 +53,27 @@ AbstractDifferentiation.HigherOrderBackend

## Derivatives

The following list of functions can be used to request the derivative, gradient, Jacobian or Hessian without the function value.
The following list of functions can be used to request the derivative, gradient, Jacobian, second derivative or Hessian without the function value.

```@docs
AbstractDifferentiation.derivative
AbstractDifferentiation.gradient
AbstractDifferentiation.jacobian
AbstractDifferentiation.second_derivative
AbstractDifferentiation.hessian
```

## Value and derivatives

The following list of functions can be used to request the function value along with its derivative, gradient, Jacobian or Hessian. You can also request the function value, its gradient and Hessian for single-input functions.
The following list of functions can be used to request the function value along with its derivative, gradient, Jacobian, second derivative, or Hessian. You can also request the function value, its derivative (or its gradient) and its second derivative (or Hessian) for single-input functions.

```@docs
AbstractDifferentiation.value_and_derivative
AbstractDifferentiation.value_and_gradient
AbstractDifferentiation.value_and_jacobian
AbstractDifferentiation.value_and_second_derivative
AbstractDifferentiation.value_and_hessian
AbstractDifferentiation.value_derivative_and_second_derivative
AbstractDifferentiation.value_gradient_and_hessian
```

Expand Down
27 changes: 27 additions & 0 deletions ext/AbstractDifferentiationForwardDiffExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,47 @@ function AD.hessian(ba::AD.ForwardDiffBackend, f, x::AbstractArray)
return (ForwardDiff.hessian(f, x, cfg),)
end

function AD.value_and_derivative(::AD.ForwardDiffBackend, f, x::Real)
T = typeof(ForwardDiff.Tag(f, typeof(x)))
gdalle marked this conversation as resolved.
Show resolved Hide resolved
ydual = f(ForwardDiff.Dual{T}(x, one(x)))
gdalle marked this conversation as resolved.
Show resolved Hide resolved
return ForwardDiff.value(T, ydual), (ForwardDiff.partials(T, ydual, 1),)
end

function AD.value_and_gradient(ba::AD.ForwardDiffBackend, f, x::AbstractArray)
result = DiffResults.GradientResult(x)
cfg = ForwardDiff.GradientConfig(f, x, chunk(ba, x))
ForwardDiff.gradient!(result, f, x, cfg)
return DiffResults.value(result), (DiffResults.derivative(result),)
end

function AD.value_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real)
T = typeof(ForwardDiff.Tag(f, typeof(x)))
xdual = ForwardDiff.Dual{T}(x, one(x))
T2 = typeof(ForwardDiff.Tag(f, typeof(xdual)))
gdalle marked this conversation as resolved.
Show resolved Hide resolved
ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual)))
v = ForwardDiff.value(T, ForwardDiff.value(T2, ydual))
d2 = ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1)
return v, (d2,)
end

function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x)
result = DiffResults.HessianResult(x)
cfg = ForwardDiff.HessianConfig(f, result, x, chunk(ba, x))
ForwardDiff.hessian!(result, f, x, cfg)
return DiffResults.value(result), (DiffResults.hessian(result),)
end

function AD.value_derivative_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real)
T = typeof(ForwardDiff.Tag(f, typeof(x)))
xdual = ForwardDiff.Dual{T}(x, one(x))
T2 = typeof(ForwardDiff.Tag(f, typeof(xdual)))
ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual)))
v = ForwardDiff.value(T, ForwardDiff.value(T2, ydual))
d = ForwardDiff.partials(T, ForwardDiff.value(T2, ydual), 1)
d2 = ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1)
return v, (d,), (d2,)
end

@inline step_toward(x::Number, v::Number, h) = x + h * v
# support arrays and tuples
@noinline step_toward(x, v, h) = x .+ h .* v
Expand Down
53 changes: 52 additions & 1 deletion src/AbstractDifferentiation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,24 @@
return jacobian(lowest(ab), f, xs...)
end

"""
AD.second_derivative(ab::AD.AbstractBackend, f, x)

Compute the second derivative of `f` with respect to the input `x` using the backend `ab`.

The function returns a single value because `second_derivative` currently only supports a single input.
"""
function second_derivative(ab::AbstractBackend, f, x)
if x isa Tuple
# only support computation of second derivative for functions with single input argument
x = only(x)
end
return derivative(second_lowest(ab), x -> begin
d = derivative(lowest(ab), f, x)
return d[1] # derivative returns a tuple
end, x)
end

"""
AD.hessian(ab::AD.AbstractBackend, f, x)

Expand Down Expand Up @@ -139,12 +157,23 @@
return value, jacs
end

"""
AD.value_and_second_derivative(ab::AD.AbstractBackend, f, x)

Return the tuple `(v, d2)` of the function value `v = f(x)` and the second derivative `d2 = AD.second_derivative(ab, f, x)`.

See also [`AbstractDifferentiation.second_derivative`](@ref)
"""
function value_and_second_derivative(ab::AbstractBackend, f, x)
return f(x), second_derivative(ab, f, x)
end

"""
AD.value_and_hessian(ab::AD.AbstractBackend, f, x)

Return the tuple `(v, H)` of the function value `v = f(x)` and the Hessian `H = AD.hessian(ab, f, x)`.

See also [`AbstractDifferentiation.hessian`](@ref).
See also [`AbstractDifferentiation.hessian`](@ref).
"""
function value_and_hessian(ab::AbstractBackend, f, x)
if x isa Tuple
Expand Down Expand Up @@ -176,6 +205,28 @@
return value, hess
end

"""
AD.value_derivative_and_second_derivative(ab::AD.AbstractBackend, f, x)

Return the tuple `(v, d, d2)` of the function value `v = f(x)`, the first derivative `d = AD.derivative(ab, f, x)`, and the second derivative `d2 = AD.second_derivative(ab, f, x)`.
"""
function value_derivative_and_second_derivative(ab::AbstractBackend, f, x)
if x isa Tuple
# only support computation of Hessian for functions with single input argument
x = only(x)
end

value = f(x)
deriv, secondderiv = value_and_derivative(
second_lowest(ab), _x -> begin
d = derivative(lowest(ab), f, _x)
return d[1] # derivative returns a tuple
end, x
)

Check warning on line 225 in src/AbstractDifferentiation.jl

View check run for this annotation

Codecov / codecov/patch

src/AbstractDifferentiation.jl#L225

Added line #L225 was not covered by tests

return value, (deriv,), secondderiv
end

"""
AD.value_gradient_and_hessian(ab::AD.AbstractBackend, f, x)

Expand Down
3 changes: 3 additions & 0 deletions test/finitedifferences.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ using FiniteDifferences
@testset "Jacobian" begin
test_jacobians(backend)
end
@testset "Second derivative" begin
test_second_derivatives(backend)
end
@testset "Hessian" begin
test_hessians(backend)
end
Expand Down
3 changes: 3 additions & 0 deletions test/forwarddiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ using ForwardDiff
@testset "Jacobian" begin
test_jacobians(backend)
end
@testset "Second derivative" begin
test_second_derivatives(backend)
end
@testset "Hessian" begin
test_hessians(backend)
end
Expand Down
3 changes: 3 additions & 0 deletions test/reversediff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ using ReverseDiff
@testset "Jacobian" begin
test_jacobians(backend)
end
@testset "Second derivative" begin
test_second_derivatives(backend)
end
@testset "Hessian" begin
test_hessians(backend)
end
Expand Down
3 changes: 3 additions & 0 deletions test/ruleconfig.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ using Zygote
@testset "j′vp" begin
test_j′vp(backend)
end
@testset "Second derivative" begin
test_second_derivatives(backend)
end
@testset "Lazy Derivative" begin
test_lazy_derivatives(backend)
end
Expand Down
39 changes: 39 additions & 0 deletions test/test_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Random.seed!(1234)
fder(x, y) = exp(y) * x + y * log(x)
dfderdx(x, y) = exp(y) + y * 1 / x
dfderdy(x, y) = exp(y) * x + log(x)
dfderdxdx(x, y) = -y / x^2

fgrad(x, y) = prod(x) + sum(y ./ (1:length(y)))
dfgraddx(x, y) = prod(x) ./ x
Expand Down Expand Up @@ -143,6 +144,44 @@ function test_jacobians(backend; multiple_inputs=true, test_types=true)
@test yvec == yvec2
end

function test_second_derivatives(backend; test_types=true)
# explicit test that AbstractDifferentiation throws an error
# don't support tuple of second derivatives
@test_throws ArgumentError AD.second_derivative(
backend, x -> fder(x, yscalar), (xscalar, yscalar)
)
@test_throws MethodError AD.second_derivative(
backend, x -> fder(x, yscalar), xscalar, yscalar
)

# test if single input (no tuple works)
dder1 = AD.second_derivative(backend, x -> fder(x, yscalar), xscalar)
if test_types
@test only(dder1) isa Float64
end
@test dfderdxdx(xscalar, yscalar) ≈ only(dder1) atol = 1e-8
valscalar, dder2 = AD.value_and_second_derivative(
backend, x -> fder(x, yscalar), xscalar
)
if test_types
@test valscalar isa Float64
@test only(dder2) isa Float64
end
@test valscalar == fder(xscalar, yscalar)
@test dder2 == dder1
valscalar, der, dder3 = AD.value_derivative_and_second_derivative(
backend, x -> fder(x, yscalar), xscalar
)
if test_types
@test valscalar isa Float64
@test only(der) isa Float64
@test only(dder3) isa Float64
end
@test valscalar == fder(xscalar, yscalar)
@test der == AD.derivative(backend, x -> fder(x, yscalar), xscalar)
@test dder3 == dder1
end

function test_hessians(backend; multiple_inputs=false, test_types=true)
if multiple_inputs
# ... but
Expand Down
Loading