Skip to content

Commit

Permalink
Fix dbg for remote nodes
Browse files Browse the repository at this point in the history
dbg uses the new trace sessions and stores them in the main dbg server
on the node that's doing the tracing. However, trace sessions are
reference-counted NIF resources - this means that when the last reference
is GC'ed on the node that owns the resource, the resouce is destroyed.

In case of dbg, this meant that as soon as the first GC happened
on the relay process, the session would be destroyed and tracing would
silently stop. Furthermore, any subsequent requests to amend
trace patterns, etc would fail for the remote nodes with badarg
in trace module's functions.

This fixes the issue by making the relay process hold onto the session
reference for as long as the process is active. This solves the issue.

The amended test fails before applying this change and succeeds after.
  • Loading branch information
michalmuskala committed Jul 26, 2024
1 parent 2d11368 commit eb8a6bf
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 21 deletions.
37 changes: 20 additions & 17 deletions lib/runtime_tools/src/dbg.erl
Original file line number Diff line number Diff line change
Expand Up @@ -2346,30 +2346,33 @@ relay(SessionName, Node,To) when Node /= node() ->
-doc false.
do_relay(SessionName, Parent,RelP) ->
process_flag(trap_exit, true),
case RelP of
{Type,Data} ->
{ok,Tracer} = remote_tracer(Type,Data),
Parent ! {started, Tracer, trace:session_create(SessionName, Tracer, [])},
ok;
Pid when is_pid(Pid) ->
Parent ! {started, self(), trace:session_create(SessionName, self(), [])},
ok
end,
do_relay_1(RelP).
do_relay_1(RelP) ->
%% In the case of a port tracer, this process exists only so that
%% dbg know that the node is alive... should maybe use monitor instead?
Tracer =
case RelP of
{Type, Data} ->
{ok, Started} = remote_tracer(Type,Data),
Started;
Pid when is_pid(Pid) ->
self()
end,
Session = trace:session_create(SessionName, Tracer, []),
Parent ! {started, Tracer, Session},
do_relay_1(RelP, Session).
do_relay_1(RelP, Session) ->
%% This process exists to relay messages and keep trace session
%% alive - session is ref-counted locally, meaning just the main
%% node keeping a reference to the session won't prevent the session
%% from being garbage collected.
receive
{'EXIT', _P, _} ->
exit(normal);
TraceInfo when is_pid(RelP) -> % Here is the normal case for trace i/o
RelP ! TraceInfo,
do_relay_1(RelP);
RelP ! TraceInfo,
do_relay_1(RelP, Session);
Other ->
Modifier = modifier(user),
io:format(user,"** relay got garbage: ~"++Modifier++"p~n", [Other]),
do_relay_1(RelP)
do_relay_1(RelP, Session)
end.
-doc false.
Expand Down
9 changes: 5 additions & 4 deletions lib/runtime_tools/test/dbg_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -409,17 +409,18 @@ distributed(Config) when is_list(Config) ->
{value, {matched, Node, 1}} = lists:keysearch(Node, 2, Z),
dbg:cn(Node),
dbg:tp(dbg,ln,[]),
ok = rpc:block_call(Node, dbg, ltp, []),
ok = rpc:block_call(Node, dbg, ln, []),
ok = dbg:ln(),
[ok = rpc:block_call(Node, dbg, ltp, []) || _ <- lists:seq(1,100)],
ok = rpc:block_call(Node, dbg, ln, []),
S = self(),
{TraceSend, TraceCall} =
lists:partition(fun ({trace,RP,send,_,_}) when RP =:= RexPid -> true;
(_) -> false end,
flush()),
[_|_] = TraceSend,
[{trace,Pid,call,{dbg,ltp,[]}},
{trace,S,call,{dbg,ln,[]}}] = TraceCall,
[{trace,S,call,{dbg,ln,[]}} | RemoteCall] = TraceCall,
100 = length(RemoteCall),
[{trace,Pid,call,{dbg,ltp,[]}}] = lists:uniq(RemoteCall),
Node = node(Pid),
%%
stop()
Expand Down

0 comments on commit eb8a6bf

Please sign in to comment.