Skip to content

Commit

Permalink
compiler alias analysis: Speed up state pruning
Browse files Browse the repository at this point in the history
In around 80% of the cases when prune is called, more than half of the
nodes in the sharing state database survive. Therefore a pruning
strategy which removes nodes from the database has been used. This
patch resurrects the previously used pruning algorithm which instead
recreated the pruned state from scratch instead of removing nodes
which is faster when only a few nodes survive the pruning. The alias
analysis implementation is then extended to, for each pruned basic
block, by looking at the number of nodes in the database before and
after the prune, record which strategy is the most efficient. If the
basic block is revisited, the fastest prune implementation is
selected. On the modules compiled by the `scripts/diffable` tool, this
change reduces the time spent on alias analysis by close to six
percent.
  • Loading branch information
frej committed Aug 14, 2024
1 parent 80350f6 commit c6464c9
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 13 deletions.
60 changes: 47 additions & 13 deletions lib/compiler/src/beam_ssa_alias.erl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ fn(#b_local{name=#b_literal{val=N},arity=A}) ->
cnt = 0 :: non_neg_integer(),
%% Functions which have been analyzed at least once.
analyzed = sets:new() :: sets:set(func_id()),
run_count = #{} :: #{ func_id() => non_neg_integer() }
run_count = #{} :: #{ func_id() => non_neg_integer() },
prune_strategy = #{} :: #{ func_id() =>
#{beam_ssa:label() =>
'add' | 'del'} }
}).

%% A code location refering to either the #b_set{} defining a variable
Expand Down Expand Up @@ -393,15 +396,18 @@ aa_fixpoint([], Order, #aas{func_db=FuncDb,repeats=Repeats}=AAS, NoofIters) ->
end.

aa_fun(F, #opt_st{ssa=Linear0,args=Args},
AAS0=#aas{alias_map=AliasMap0,analyzed=Analyzed,kills=KillsMap}) ->
AAS0=#aas{alias_map=AliasMap0,analyzed=Analyzed,kills=KillsMap,
prune_strategy=StrategyMap0}) ->
%% Initially assume all formal parameters are unique for a
%% non-exported function, if we have call argument info in the
%% AAS, we use it. For an exported function, all arguments are
%% assumed to be aliased.
{SS0,Cnt} = aa_init_fun_ss(Args, F, AAS0),
#{F:={LiveIns,Kills,PhiLiveIns}} = KillsMap,
{SS,AAS1} = aa_blocks(Linear0, LiveIns, PhiLiveIns,
Kills, #{0=>SS0}, AAS0#aas{cnt=Cnt}),
Strategy0 = maps:get(F, StrategyMap0, #{}),
{SS,Strategy,AAS1} =
aa_blocks(Linear0, LiveIns, PhiLiveIns,
Kills, #{0=>SS0}, Strategy0, AAS0#aas{cnt=Cnt}),
Lbl2SS0 = maps:get(F, AliasMap0, #{}),
Type2Status0 = maps:get(returns, Lbl2SS0, #{}),
Type2Status = maps:get(returns, SS, #{}),
Expand All @@ -412,30 +418,58 @@ aa_fun(F, #opt_st{ssa=Linear0,args=Args},
AAS1
end,
AliasMap = AliasMap0#{ F => SS },
AAS#aas{alias_map=AliasMap,analyzed=sets:add_element(F, Analyzed)}.
StrategyMap = StrategyMap0#{F => Strategy},
AAS#aas{alias_map=AliasMap,analyzed=sets:add_element(F, Analyzed),
prune_strategy=StrategyMap}.

%% Main entry point for the alias analysis
aa_blocks([{?EXCEPTION_BLOCK,_}|Bs],
LiveIns, PhiLiveIns, Kills, Lbl2SS, AAS) ->
LiveIns, PhiLiveIns, Kills, Lbl2SS, Strategy, AAS) ->
%% Nothing happening in the exception block can propagate to the
%% other block.
aa_blocks(Bs, LiveIns, PhiLiveIns, Kills, Lbl2SS, AAS);
aa_blocks(Bs, LiveIns, PhiLiveIns, Kills, Lbl2SS, Strategy, AAS);
aa_blocks([{L,#b_blk{is=Is0,last=T}}|Bs0],
LiveIns, PhiLiveIns, Kills, Lbl2SS0, AAS0) ->
LiveIns, PhiLiveIns, Kills, Lbl2SS0, Strategy0, AAS0) ->
#{L:=SS0} = Lbl2SS0,
?DP("Block: ~p~nSS:~n~s~n", [L, beam_ssa_ss:dump(SS0)]),
{FullSS,AAS1} = aa_is(Is0, SS0, AAS0),
#{{live_outs,L}:=LiveOut} = Kills,
#{{killed_in_block,L}:=KilledInBlock} = Kills,
{Lbl2SS1,Successors} = aa_terminator(T, FullSS, Lbl2SS0),
PrunedSS = beam_ssa_ss:prune(LiveOut, KilledInBlock, FullSS),
%% In around 80% of the cases when prune is called, more than half
%% of the nodes in the sharing state database survive. Therefore
%% we default to a pruning strategy which removes nodes from the
%% database. But if only a few nodes survive it is faster to
%% recreate the pruned state from scratch. We therefore track the
%% result of a previous prune for the current basic block and
%% select the, hopefully, best pruning strategy.
Before = beam_ssa_ss:size(FullSS),
S = maps:get(L, Strategy0, del),
PrunedSS =
case S of
del ->
#{{killed_in_block,L}:=KilledInBlock} = Kills,
beam_ssa_ss:prune(LiveOut, KilledInBlock, FullSS);
add ->
beam_ssa_ss:prune_by_add(LiveOut, FullSS)
end,
After = beam_ssa_ss:size(PrunedSS),
Strategy = case After < (Before div 2) of
true when S =:= add ->
Strategy0;
false when S =:= del ->
Strategy0;
true ->
Strategy0#{L => add};
false ->
Strategy0#{L => del}
end,
?DP("Live out from ~p: ~p~n", [L, sets:to_list(LiveOut)]),
Lbl2SS2 = aa_add_block_entry_ss(Successors, L, PrunedSS,
LiveOut, LiveIns, PhiLiveIns, Lbl2SS1),
Lbl2SS = aa_set_block_exit_ss(L, FullSS, Lbl2SS2),
aa_blocks(Bs0, LiveIns, PhiLiveIns, Kills, Lbl2SS, AAS1);
aa_blocks([], _LiveIns, _PhiLiveIns, _Kills, Lbl2SS, AAS) ->
{Lbl2SS,AAS}.
aa_blocks(Bs0, LiveIns, PhiLiveIns, Kills, Lbl2SS, Strategy, AAS1);
aa_blocks([], _LiveIns, _PhiLiveIns, _Kills, Lbl2SS, Strategy, AAS) ->
{Lbl2SS, Strategy, AAS}.

aa_is([_I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], SS0, AAS0) ->
?DP("I: ~p~n", [_I]),
Expand Down
58 changes: 58 additions & 0 deletions lib/compiler/src/beam_ssa_ss.erl
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@
new/3,
phi/4,
prune/3,
prune_by_add/2,
set_call_result/4,
set_status/3,
size/1,
variables/1]).

-include("beam_ssa.hrl").
Expand Down Expand Up @@ -520,6 +522,58 @@ is_safe_to_prune(V, LiveVars, State) ->
end
end.

%%%
%%% As prune/3, but doing the pruning by rebuilding the surviving
%%% state from scratch.
%%%
%%% Throws `too_deep` if the depth of sharing state value chains
%%% exceeds SS_DEPTH_LIMIT.
%%%
-spec prune_by_add(sets:set(beam_ssa:b_var()), sharing_state())
-> sharing_state().
prune_by_add(LiveVars, State) ->
?assert_state(State),
?DP("Pruning to ~p~n", [sets:to_list(LiveVars)]),
?DP("~s~n", [dump(State)]),
?DP("Vertices: ~p~n", [beam_digraph:vertices(State)]),
R = prune_by_add([{0,V} || V <- sets:to_list(LiveVars),
beam_digraph:has_vertex(State, V)],
[], new(), State),
?DP("Pruned result~n~s~n", [dump(R)]),
?assert_state(R).

prune_by_add([{Depth0,V}|Wanted], Edges, New0, Old) ->
?DP("Looking at ~p~n", [V]),
?DP("Curr:~n~s~n", [dump(New0)]),
?DP("Wanted: ~p~n", [Wanted]),
case beam_digraph:has_vertex(New0, V) of
true ->
%% This variable is already added.
prune_by_add(Wanted, Edges, New0, Old);
false when Depth0 < ?SS_DEPTH_LIMIT ->
%% This variable has to be kept. Add it to the new graph.
New = add_vertex(New0, V, beam_digraph:vertex(Old, V)),
%% Add all incoming edges to this node.
InEdges = beam_digraph:in_edges(Old, V),
Depth = Depth0 + 1,
InNodes = [{Depth, From} || {From,_,_} <- InEdges],
prune_by_add(InNodes ++ Wanted, InEdges ++ Edges, New, Old);
false ->
%% We're in too deep, give up. This case will probably
%% never be hit as it would require a previous prune/3
%% application which doesn't hit the depth limit and for
%% it to remove more than half of the nodes to trigger the
%% use of prune_by_add/2, and in a later iteration trigger
%% the depth limit. As it cannot be definitely ruled out,
%% take the hit against the coverage statistics, as the
%% handling code in beam_ssa_alias is tested.
throw(too_deep)
end;
prune_by_add([], Edges, New0, _Old) ->
foldl(fun({Src,Dst,Lbl}, New) ->
add_edge(New, Src, Dst, Lbl)
end, New0, Edges).

-spec set_call_result(beam_ssa:b_var(), call_in_arg_status(),
sharing_state(), non_neg_integer()) ->
{sharing_state(),non_neg_integer()}.
Expand Down Expand Up @@ -604,6 +658,10 @@ get_alias_edges(V, State) ->
end],
EmbedEdges ++ OutEdges.

-spec size(sharing_state()) -> non_neg_integer().
size(State) ->
beam_digraph:no_vertices(State).

-spec variables(sharing_state()) -> [beam_ssa:b_var()].
variables(State) ->
[V || {V,_Lbl} <- beam_digraph:vertices(State)].
Expand Down

0 comments on commit c6464c9

Please sign in to comment.