Skip to content

Commit

Permalink
Add a watcher mechanism to detect when Distributed might be in use
Browse files Browse the repository at this point in the history
This should help users figure out if one of their packages is using Distributed and
another is using DistributedNext.
  • Loading branch information
JamesWrigley committed Nov 26, 2024
1 parent 13ce55b commit ec045ae
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 2 deletions.
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"

[compat]
Distributed = "1"

[extras]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
LibSSH = "00483490-30f8-4353-8aba-35b82f51f4d0"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["LinearAlgebra", "Test", "LibSSH"]
test = ["LinearAlgebra", "Test", "LibSSH", "Distributed"]
5 changes: 5 additions & 0 deletions docs/src/_changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ This documents notable changes in DistributedNext.jl. The format is based on

## Unreleased

### Added
- A watcher mechanism has been added to detect when both the Distributed stdlib
and DistributedNext may be active and adding workers. This should help prevent
incompatibilities from both libraries being used simultaneously ([#10]).

### Fixed
- Fixed behaviour of `isempty(::RemoteChannel)`, which previously had the
side-effect of taking an element from the channel ([#3]).
Expand Down
28 changes: 28 additions & 0 deletions src/DistributedNext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,17 @@ export
# Used only by shared arrays.
check_same_host

distributed_module::Union{Module, Nothing} = nothing

function _find_distributed_stdlib()
# Find the Distributed module if it's been loaded
distributed_pkgid = Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed")
global distributed_module = get(Base.loaded_modules, distributed_pkgid, nothing)
end

function _require_callback(mod::Base.PkgId)
_find_distributed_stdlib()

if Base.toplevel_load[] && myid() == 1 && nprocs() > 1
# broadcast top-level (e.g. from Main) import/using from node 1 (only)
@sync for p in procs()
Expand Down Expand Up @@ -116,6 +126,24 @@ include("precompile.jl")

function __init__()
init_parallel()

# Start a task to watch for the Distributed stdlib being loaded and
# initialized to support multiple workers. We do this by checking if the
# cluster cookie has been set, which is most likely to have been done
# through Distributed.init_multi() being called by Distributed.addprocs() or
# something.
_find_distributed_stdlib()
watcher_task = Threads.@spawn while true
if !isnothing(distributed_module)
if isdefined(distributed_module.LPROC, :cookie) && inited[]
@warn "DistributedNext has detected that the Distributed stdlib may be in use. Be aware that these libraries are not compatible, you should use either one or the other."
return
end
end

sleep(0.5)
end
errormonitor(watcher_task)
end

end
2 changes: 1 addition & 1 deletion test/distributed_exec.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

using Test, DistributedNext, Random, Serialization, Sockets
using DistributedNext, Random, Serialization, Sockets
import DistributedNext: launch, manage


Expand Down
24 changes: 24 additions & 0 deletions test/distributed_stdlib_detection.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using DistributedNext

@testset "Distributed.jl detection" begin
# Just loading Distributed should do nothing
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; sleep(1)'`
stderr_buf = IOBuffer()
run(pipeline(cmd; stderr=stderr_buf))
stderr_str = String(take!(stderr_buf))
@test isempty(stderr_str)

# Only one of the two being active should also do nothing
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); sleep(1)'`
stderr_buf = IOBuffer()
run(pipeline(cmd; stderr=stderr_buf))
stderr_str = String(take!(stderr_buf))
@test isempty(stderr_str)

# But both being active at the same time should trigger a warning
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); DistributedNext.init_multi(); sleep(1)'`
stderr_buf = IOBuffer()
run(pipeline(cmd; stderr=stderr_buf))
stderr_str = String(take!(stderr_buf))
@test contains(stderr_str, "DistributedNext has detected that the Distributed stdlib may be in use")
end
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

using Test

# Run the distributed test outside of the main driver since it needs its own
# set of dedicated workers.
include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
Expand All @@ -18,3 +20,5 @@ end
include("distributed_exec.jl")

include("managers.jl")

include("distributed_stdlib_detection.jl")

0 comments on commit ec045ae

Please sign in to comment.