diff --git a/Project.toml b/Project.toml index 9ab3094..26447ea 100644 --- a/Project.toml +++ b/Project.toml @@ -8,15 +8,17 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Sockets = "6462fe0b-24de-5631-8697-dd941f90decc" [compat] +Distributed = "1" Random = "1" Serialization = "1" Sockets = "1" julia = "1.9" [extras] +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" LibSSH = "00483490-30f8-4353-8aba-35b82f51f4d0" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["LinearAlgebra", "Test", "LibSSH"] +test = ["LinearAlgebra", "Test", "LibSSH", "Distributed"] diff --git a/docs/src/_changelog.md b/docs/src/_changelog.md index 3b18108..75c9ee1 100644 --- a/docs/src/_changelog.md +++ b/docs/src/_changelog.md @@ -9,6 +9,11 @@ This documents notable changes in DistributedNext.jl. The format is based on ## [v1.0.0] - 2024-12-02 +### Added +- A watcher mechanism has been added to detect when both the Distributed stdlib + and DistributedNext may be active and adding workers. This should help prevent + incompatibilities from both libraries being used simultaneously ([#10]). + ### Fixed - Fixed behaviour of `isempty(::RemoteChannel)`, which previously had the side-effect of taking an element from the channel ([#3]). diff --git a/src/DistributedNext.jl b/src/DistributedNext.jl index ca0833b..841269c 100644 --- a/src/DistributedNext.jl +++ b/src/DistributedNext.jl @@ -72,6 +72,21 @@ export # Used only by shared arrays. check_same_host +function _check_distributed_active() + # Find the Distributed module if it's been loaded + distributed_pkgid = Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed") + if !haskey(Base.loaded_modules, distributed_pkgid) + return false + end + + if isdefined(Base.loaded_modules[distributed_pkgid].LPROC, :cookie) && inited[] + @warn "DistributedNext has detected that the Distributed stdlib may be in use. Be aware that these libraries are not compatible, you should use either one or the other." + return true + else + return false + end +end + function _require_callback(mod::Base.PkgId) if Base.toplevel_load[] && myid() == 1 && nprocs() > 1 # broadcast top-level (e.g. from Main) import/using from node 1 (only) @@ -116,6 +131,20 @@ include("precompile.jl") function __init__() init_parallel() + + # Start a task to watch for the Distributed stdlib being loaded and + # initialized to support multiple workers. We do this by checking if the + # cluster cookie has been set, which is most likely to have been done + # through Distributed.init_multi() being called by Distributed.addprocs() or + # something. + watcher_task = Threads.@spawn while true + if _check_distributed_active() + return + end + + sleep(1) + end + errormonitor(watcher_task) end end diff --git a/test/distributed_exec.jl b/test/distributed_exec.jl index 106787b..c567d92 100644 --- a/test/distributed_exec.jl +++ b/test/distributed_exec.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -using Test, DistributedNext, Random, Serialization, Sockets +using DistributedNext, Random, Serialization, Sockets import DistributedNext: launch, manage diff --git a/test/distributed_stdlib_detection.jl b/test/distributed_stdlib_detection.jl new file mode 100644 index 0000000..f3fc14e --- /dev/null +++ b/test/distributed_stdlib_detection.jl @@ -0,0 +1,22 @@ +@testset "Distributed.jl detection" begin + function get_stderr(cmd) + stderr_buf = IOBuffer() + run(pipeline(cmd; stderr=stderr_buf)) + return String(take!(stderr_buf)) + end + + # Just loading Distributed should do nothing + cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; @assert !DistributedNext._check_distributed_active()'` + @test isempty(get_stderr(cmd)) + + # Only one of the two being active should also do nothing + cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); @assert !DistributedNext._check_distributed_active()'` + @test isempty(get_stderr(cmd)) + + cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; DistributedNext.init_multi(); @assert !DistributedNext._check_distributed_active()'` + @test isempty(get_stderr(cmd)) + + # But both being active at the same time should trigger a warning + cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); DistributedNext.init_multi(); @assert DistributedNext._check_distributed_active()'` + @test contains(get_stderr(cmd), "DistributedNext has detected that the Distributed stdlib may be in use") +end diff --git a/test/runtests.jl b/test/runtests.jl index f5f56c7..17f2b4f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +using Test + # Run the distributed test outside of the main driver since it needs its own # set of dedicated workers. include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl")) @@ -18,3 +20,5 @@ end include("distributed_exec.jl") include("managers.jl") + +include("distributed_stdlib_detection.jl")