Merge pull request #8 from tpapp/tp/fix-threaded-use

tpapp · web-flow · commit bc8c5432e8a6 · 2023-03-08T14:29:55.000+01:00
Don't preallocate GradientConfig in ForwardDiff backend by default
diff --git a/Project.toml b/Project.toml
@@ -34,6 +34,7 @@ SimpleUnPack = "1"
 
 [extras]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -43,4 +44,4 @@ Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["BenchmarkTools", "Enzyme", "ForwardDiff", "Random", "ReverseDiff", "Test", "Tracker", "Zygote"]
+test = ["BenchmarkTools", "ComponentArrays", "Enzyme", "ForwardDiff", "Random", "ReverseDiff", "Test", "Tracker", "Zygote"]
diff --git a/ext/LogDensityProblemsADForwardDiffExt.jl b/ext/LogDensityProblemsADForwardDiffExt.jl
@@ -18,49 +18,95 @@ end
 # Load DiffResults helpers
 include("DiffResults_helpers.jl")
 
-struct ForwardDiffLogDensity{L, C} <: ADGradientWrapper
+struct ForwardDiffLogDensity{L, C <: ForwardDiff.Chunk, T <: Union{Nothing,ForwardDiff.Tag},
+                             G <: Union{Nothing,ForwardDiff.GradientConfig}} <: ADGradientWrapper
+    "supports zero-order evaluation `logdensity(ℓ, x)`"
     ℓ::L
-    gradientconfig::C
+    "chunk size for ForwardDiff"
+    chunk::C
+    "tag, or `nothing` for the default"
+    tag::T
+    "gradient config, or `nothing` if created for each evaluation"
+    gradient_config::G
 end
 
 function Base.show(io::IO, ℓ::ForwardDiffLogDensity)
     print(io, "ForwardDiff AD wrapper for ", ℓ.ℓ,
-          ", w/ chunk size ", length(ℓ.gradientconfig.seeds))
+          ", w/ chunk size ", ForwardDiff.chunksize(ℓ.chunk))
 end
 
 _chunk(chunk::ForwardDiff.Chunk) = chunk
 _chunk(chunk::Integer) = ForwardDiff.Chunk(chunk)
 
 _default_chunk(ℓ) = _chunk(dimension(ℓ))
 
-_default_gradientconfig(ℓ, chunk, ::Nothing) = _default_gradientconfig(ℓ, chunk, zeros(dimension(ℓ)))
-function _default_gradientconfig(ℓ, chunk, x::AbstractVector)
-    return ForwardDiff.GradientConfig(Base.Fix1(logdensity, ℓ), x, _chunk(chunk))
+function Base.copy(fℓ::ForwardDiffLogDensity{L,C,T,<:ForwardDiff.GradientConfig}) where {L,C,T}
+    @unpack ℓ, chunk, tag, gradient_config = fℓ
+    ForwardDiffLogDensity(ℓ, chunk, tag, copy(gradient_config))
 end
 
 """
-    ADgradient(:ForwardDiff, ℓ; x, chunk, gradientconfig)
-    ADgradient(Val(:ForwardDiff), ℓ; x, chunk, gradientconfig)
+$(SIGNATURES)
+
+Make a `ForwardDiff.GradientConfig` for function `f` and input `x`. `tag = nothing` generates the default tag.
+"""
+function _make_gradient_config(f::F, x, chunk, tag) where {F}
+    c = _chunk(chunk)
+    gradient_config = if tag ≡ nothing
+        ForwardDiff.GradientConfig(f, x, c)
+    else
+        ForwardDiff.GradientConfig(f, x, c, tag)
+    end
+    gradient_config
+end
+
+"""
+    ADgradient(:ForwardDiff, ℓ; chunk, tag, x)
+    ADgradient(Val(:ForwardDiff), ℓ; chunk, tag, x)
 
 Wrap a log density that supports evaluation of `Value` to handle `ValueGradient`, using
 `ForwardDiff`.
 
-Keywords are passed on to `ForwardDiff.GradientConfig` to customize the setup. In
-particular, chunk size can be set with a `chunk` keyword argument (accepting an integer or a
-`ForwardDiff.Chunk`), and the underlying vector used by `ForwardDiff` can be set with the
-`x` keyword argument (accepting an `AbstractVector`).
+Keyword arguments:
+
+- `chunk` can be used to set the chunk size, an integer or a `ForwardDiff.Chunk`
+
+- `tag` (default: `nothing`) can be used to set a tag for `ForwardDiff`
+
+- `x` (default: `nothing`) will be used to preallocate a `ForwardDiff.GradientConfig` with
+  the given vector. With the default, one is created for each evaluation.
+
+Note that **pre-allocating a `ForwardDiff.GradientConfig` is not thread-safe**. You can
+[`copy`](@ref) the results for concurrent evaluation:
+```julia
+∇ℓ1 = ADgradient(:ForwardDiff, ℓ; x = zeros(dimension(ℓ)))
+∇ℓ2 = copy(∇ℓ1) # you can now use both, in different threads
+```
+
+See also the ForwardDiff documentation regarding
+[`ForwardDiff.GradientConfig`](https://juliadiff.org/ForwardDiff.jl/stable/user/api/#Preallocating/Configuring-Work-Buffers)
+and [chunks and tags](https://juliadiff.org/ForwardDiff.jl/stable/user/advanced/).
 """
 function ADgradient(::Val{:ForwardDiff}, ℓ;
-                    x::Union{Nothing,AbstractVector} = nothing,
                     chunk::Union{Integer,ForwardDiff.Chunk} = _default_chunk(ℓ),
-                    gradientconfig::ForwardDiff.GradientConfig = _default_gradientconfig(ℓ, chunk, x))
-    ForwardDiffLogDensity(ℓ, gradientconfig)
+                    tag::Union{Nothing,ForwardDiff.Tag} = nothing,
+                    x::Union{Nothing,AbstractVector} = nothing)
+    gradient_config = if x ≡ nothing
+        nothing
+    else
+        _make_gradient_config(Base.Fix1(logdensity, ℓ), x, chunk, tag)
+    end
+    ForwardDiffLogDensity(ℓ, chunk, tag, gradient_config)
 end
 
 function logdensity_and_gradient(fℓ::ForwardDiffLogDensity, x::AbstractVector)
-    @unpack ℓ, gradientconfig = fℓ
+    @unpack ℓ, chunk, tag, gradient_config = fℓ
     buffer = _diffresults_buffer(x)
-    result = ForwardDiff.gradient!(buffer, Base.Fix1(logdensity, ℓ), x, gradientconfig)
+    ℓ′ = Base.Fix1(logdensity, ℓ)
+    if gradient_config ≡ nothing
+        gradient_config = _make_gradient_config(ℓ′, x, chunk, tag)
+    end
+    result = ForwardDiff.gradient!(buffer, ℓ′, x, gradient_config)
     _diffresults_extract(result)
 end
 
diff --git a/src/LogDensityProblemsAD.jl b/src/LogDensityProblemsAD.jl
@@ -11,7 +11,6 @@ using LogDensityProblems: LogDensityOrder
 
 import SimpleUnPack
 
-
 #####
 ##### AD wrappers --- interface and generic code
 #####
@@ -34,6 +33,8 @@ dimension(ℓ::ADGradientWrapper) = dimension(ℓ.ℓ)
 
 Base.parent(ℓ::ADGradientWrapper) = ℓ.ℓ
 
+Base.copy(x::ADGradientWrapper) = x # no-op, except for ForwardDiff
+
 """
 $(SIGNATURES)
 
@@ -57,6 +58,10 @@ ADgradient(:ForwardDiff, P)
 and should mostly be equivalent if the compiler manages to fold the constant.
 
 The function `parent` can be used to retrieve the original argument.
+
+!!! note
+    With the default options, automatic differentiation preserves thread-safety. See
+    exceptions and workarounds in the docstring for each backend.
 """
 ADgradient(kind::Symbol, P; kwargs...) = ADgradient(Val{kind}(), P; kwargs...)
 
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -4,6 +4,7 @@ import LogDensityProblems: capabilities, dimension, logdensity
 using LogDensityProblems: logdensity_and_gradient, LogDensityOrder
 import ForwardDiff, Enzyme, Tracker, Zygote, ReverseDiff # backends
 import BenchmarkTools                            # load the heuristic chunks code
+using ComponentArrays: ComponentVector           # test with other vector types
 
 struct EnzymeTestMode <: Enzyme.Mode end
 
@@ -115,13 +116,30 @@ end
             (test_logdensity(x), test_gradient(x))
     end
 
-    # Make sure that other types are supported.
+    # preallocated gradient config
     x = randexp(Float32, 3)
-    ∇ℓ = ADgradient(:ForwardDiff, ℓ; x=x)
+    ∇ℓ = ADgradient(:ForwardDiff, ℓ; x = x)
     @test eltype(first(logdensity_and_gradient(∇ℓ, x))) === Float32
     @test @inferred(logdensity(∇ℓ, x)) ≅ test_logdensity(x)
     @test @inferred(logdensity_and_gradient(∇ℓ, x)) ≅
         (test_logdensity(x), test_gradient(x))
+    @test @inferred(copy(∇ℓ)).gradient_config ≢ ∇ℓ.gradient_config
+end
+
+@testset "component vectors" begin
+    # test with something else than `Vector`
+    # cf https://github.com/tpapp/LogDensityProblemsAD.jl/pull/3
+    ℓ = TestLogDensity()
+    ∇ℓ = ADgradient(:ForwardDiff, ℓ)
+    x = zeros(3)
+    y = ComponentVector(x = x)
+    @test @inferred(logdensity(∇ℓ, y)) ≅ test_logdensity(x)
+    @test @inferred(logdensity_and_gradient(∇ℓ, y)) ≅
+        (test_logdensity(x), test_gradient(x))
+    ∇ℓ2 = ADgradient(:ForwardDiff, ℓ; x = y) # preallocate GradientConfig
+    @test @inferred(logdensity(∇ℓ2, y)) ≅ test_logdensity(x)
+    @test @inferred(logdensity_and_gradient(∇ℓ2, y)) ≅
+        (test_logdensity(x), test_gradient(x))
 end
 
 @testset "chunk heuristics for ForwardDiff" begin