Skip to content

Commit

Permalink
Add a watcher mechanism to detect when Distributed might be in use
Browse files Browse the repository at this point in the history
This should help users figure out if one of their packages is using Distributed and
another is using DistributedNext.
  • Loading branch information
JamesWrigley committed Dec 3, 2024
1 parent c68111a commit fe0e79a
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 2 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"

[compat]
Distributed = "1"
Random = "1"
Serialization = "1"
Sockets = "1"
julia = "1.9"

[extras]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
LibSSH = "00483490-30f8-4353-8aba-35b82f51f4d0"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["LinearAlgebra", "Test", "LibSSH"]
test = ["LinearAlgebra", "Test", "LibSSH", "Distributed"]
5 changes: 5 additions & 0 deletions docs/src/_changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ This documents notable changes in DistributedNext.jl. The format is based on

## [v1.0.0] - 2024-12-02

### Added
- A watcher mechanism has been added to detect when both the Distributed stdlib
and DistributedNext may be active and adding workers. This should help prevent
incompatibilities from both libraries being used simultaneously ([#10]).

### Fixed
- Fixed behaviour of `isempty(::RemoteChannel)`, which previously had the
side-effect of taking an element from the channel ([#3]).
Expand Down
29 changes: 29 additions & 0 deletions src/DistributedNext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ export
# Used only by shared arrays.
check_same_host

function _check_distributed_active()
# Find the Distributed module if it's been loaded
distributed_pkgid = Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed")
if !haskey(Base.loaded_modules, distributed_pkgid)
return false
end

if isdefined(Base.loaded_modules[distributed_pkgid].LPROC, :cookie) && inited[]
@warn "DistributedNext has detected that the Distributed stdlib may be in use. Be aware that these libraries are not compatible, you should use either one or the other."
return true
else
return false
end
end

function _require_callback(mod::Base.PkgId)
if Base.toplevel_load[] && myid() == 1 && nprocs() > 1
# broadcast top-level (e.g. from Main) import/using from node 1 (only)
Expand Down Expand Up @@ -116,6 +131,20 @@ include("precompile.jl")

function __init__()
init_parallel()

# Start a task to watch for the Distributed stdlib being loaded and
# initialized to support multiple workers. We do this by checking if the
# cluster cookie has been set, which is most likely to have been done
# through Distributed.init_multi() being called by Distributed.addprocs() or
# something.
watcher_task = Threads.@spawn while true
if _check_distributed_active()
return
end

sleep(1)
end
errormonitor(watcher_task)
end

end
2 changes: 1 addition & 1 deletion test/distributed_exec.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

using Test, DistributedNext, Random, Serialization, Sockets
using DistributedNext, Random, Serialization, Sockets
import DistributedNext: launch, manage


Expand Down
22 changes: 22 additions & 0 deletions test/distributed_stdlib_detection.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
@testset "Distributed.jl detection" begin
function get_stderr(cmd)
stderr_buf = IOBuffer()
run(pipeline(cmd; stderr=stderr_buf))
return String(take!(stderr_buf))
end

# Just loading Distributed should do nothing
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; @assert !DistributedNext._check_distributed_active()'`
@test isempty(get_stderr(cmd))

# Only one of the two being active should also do nothing
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); @assert !DistributedNext._check_distributed_active()'`
@test isempty(get_stderr(cmd))

cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; DistributedNext.init_multi(); @assert !DistributedNext._check_distributed_active()'`
@test isempty(get_stderr(cmd))

# But both being active at the same time should trigger a warning
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); DistributedNext.init_multi(); @assert DistributedNext._check_distributed_active()'`
@test contains(get_stderr(cmd), "DistributedNext has detected that the Distributed stdlib may be in use")
end
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

using Test

# Run the distributed test outside of the main driver since it needs its own
# set of dedicated workers.
include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
Expand All @@ -18,3 +20,5 @@ end
include("distributed_exec.jl")

include("managers.jl")

include("distributed_stdlib_detection.jl")

0 comments on commit fe0e79a

Please sign in to comment.