From 88b843824688e94b9f9c776925bc8203ebf0daca Mon Sep 17 00:00:00 2001 From: Frej Drejhammar Date: Thu, 25 Jul 2024 09:59:52 +0200 Subject: [PATCH] compiler alias analysis: Speed up state pruning In around 80% of the cases when prune is called, more than half of the nodes in the sharing state database survive. Therefore a pruning strategy which removes nodes from the database has been used. This patch resurrects the previously used pruning algorithm which instead recreated the pruned state from scratch instead of removing nodes which is faster when only a few nodes survive the pruning. The alias analysis implementation is then extended to, for each pruned basic block, by looking at the number of nodes in the database before and after the prune, record which strategy is the most efficient. If the basic block is revisited, the fastest prune implementation is selected. On the modules compiled by the `scripts/diffable` tool, this change reduces the time spent on alias analysis by close to six percent. --- lib/compiler/src/beam_ssa_alias.erl | 60 ++++++++++++++++++++++------- lib/compiler/src/beam_ssa_ss.erl | 58 ++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 13 deletions(-) diff --git a/lib/compiler/src/beam_ssa_alias.erl b/lib/compiler/src/beam_ssa_alias.erl index d34a81f3b55e..12eb2f513811 100644 --- a/lib/compiler/src/beam_ssa_alias.erl +++ b/lib/compiler/src/beam_ssa_alias.erl @@ -62,7 +62,10 @@ fn(#b_local{name=#b_literal{val=N},arity=A}) -> cnt = 0 :: non_neg_integer(), %% Functions which have been analyzed at least once. analyzed = sets:new() :: sets:set(func_id()), - run_count = #{} :: #{ func_id() => non_neg_integer() } + run_count = #{} :: #{ func_id() => non_neg_integer() }, + prune_strategy = #{} :: #{ func_id() => + #{beam_ssa:label() => + 'add' | 'del'} } }). %% A code location refering to either the #b_set{} defining a variable @@ -393,15 +396,18 @@ aa_fixpoint([], Order, #aas{func_db=FuncDb,repeats=Repeats}=AAS, NoofIters) -> end. aa_fun(F, #opt_st{ssa=Linear0,args=Args}, - AAS0=#aas{alias_map=AliasMap0,analyzed=Analyzed,kills=KillsMap}) -> + AAS0=#aas{alias_map=AliasMap0,analyzed=Analyzed,kills=KillsMap, + prune_strategy=StrategyMap0}) -> %% Initially assume all formal parameters are unique for a %% non-exported function, if we have call argument info in the %% AAS, we use it. For an exported function, all arguments are %% assumed to be aliased. {SS0,Cnt} = aa_init_fun_ss(Args, F, AAS0), #{F:={LiveIns,Kills,PhiLiveIns}} = KillsMap, - {SS,AAS1} = aa_blocks(Linear0, LiveIns, PhiLiveIns, - Kills, #{0=>SS0}, AAS0#aas{cnt=Cnt}), + Strategy0 = maps:get(F, StrategyMap0, #{}), + {SS,Strategy,AAS1} = + aa_blocks(Linear0, LiveIns, PhiLiveIns, + Kills, #{0=>SS0}, Strategy0, AAS0#aas{cnt=Cnt}), Lbl2SS0 = maps:get(F, AliasMap0, #{}), Type2Status0 = maps:get(returns, Lbl2SS0, #{}), Type2Status = maps:get(returns, SS, #{}), @@ -412,30 +418,58 @@ aa_fun(F, #opt_st{ssa=Linear0,args=Args}, AAS1 end, AliasMap = AliasMap0#{ F => SS }, - AAS#aas{alias_map=AliasMap,analyzed=sets:add_element(F, Analyzed)}. + StrategyMap = StrategyMap0#{F => Strategy}, + AAS#aas{alias_map=AliasMap,analyzed=sets:add_element(F, Analyzed), + prune_strategy=StrategyMap}. %% Main entry point for the alias analysis aa_blocks([{?EXCEPTION_BLOCK,_}|Bs], - LiveIns, PhiLiveIns, Kills, Lbl2SS, AAS) -> + LiveIns, PhiLiveIns, Kills, Lbl2SS, Strategy, AAS) -> %% Nothing happening in the exception block can propagate to the %% other block. - aa_blocks(Bs, LiveIns, PhiLiveIns, Kills, Lbl2SS, AAS); + aa_blocks(Bs, LiveIns, PhiLiveIns, Kills, Lbl2SS, Strategy, AAS); aa_blocks([{L,#b_blk{is=Is0,last=T}}|Bs0], - LiveIns, PhiLiveIns, Kills, Lbl2SS0, AAS0) -> + LiveIns, PhiLiveIns, Kills, Lbl2SS0, Strategy0, AAS0) -> #{L:=SS0} = Lbl2SS0, ?DP("Block: ~p~nSS:~n~s~n", [L, beam_ssa_ss:dump(SS0)]), {FullSS,AAS1} = aa_is(Is0, SS0, AAS0), #{{live_outs,L}:=LiveOut} = Kills, - #{{killed_in_block,L}:=KilledInBlock} = Kills, {Lbl2SS1,Successors} = aa_terminator(T, FullSS, Lbl2SS0), - PrunedSS = beam_ssa_ss:prune(LiveOut, KilledInBlock, FullSS), + %% In around 80% of the cases when prune is called, more than half + %% of the nodes in the sharing state database survive. Therefore + %% we default to a pruning strategy which removes nodes from the + %% database. But if only a few nodes survive it is faster to + %% recreate the pruned state from scratch. We therefore track the + %% result of a previous prune for the current basic block and + %% select the, hopefully, best pruning strategy. + Before = beam_ssa_ss:size(FullSS), + S = maps:get(L, Strategy0, del), + PrunedSS = + case S of + del -> + #{{killed_in_block,L}:=KilledInBlock} = Kills, + beam_ssa_ss:prune(LiveOut, KilledInBlock, FullSS); + add -> + beam_ssa_ss:prune_by_add(LiveOut, FullSS) + end, + After = beam_ssa_ss:size(PrunedSS), + Strategy = case After < (Before div 2) of + true when S =:= add -> + Strategy0; + false when S =:= del -> + Strategy0; + true -> + Strategy0#{L => add}; + false -> + Strategy0#{L => del} + end, ?DP("Live out from ~p: ~p~n", [L, sets:to_list(LiveOut)]), Lbl2SS2 = aa_add_block_entry_ss(Successors, L, PrunedSS, LiveOut, LiveIns, PhiLiveIns, Lbl2SS1), Lbl2SS = aa_set_block_exit_ss(L, FullSS, Lbl2SS2), - aa_blocks(Bs0, LiveIns, PhiLiveIns, Kills, Lbl2SS, AAS1); -aa_blocks([], _LiveIns, _PhiLiveIns, _Kills, Lbl2SS, AAS) -> - {Lbl2SS,AAS}. + aa_blocks(Bs0, LiveIns, PhiLiveIns, Kills, Lbl2SS, Strategy, AAS1); +aa_blocks([], _LiveIns, _PhiLiveIns, _Kills, Lbl2SS, Strategy, AAS) -> + {Lbl2SS, Strategy, AAS}. aa_is([_I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], SS0, AAS0) -> ?DP("I: ~p~n", [_I]), diff --git a/lib/compiler/src/beam_ssa_ss.erl b/lib/compiler/src/beam_ssa_ss.erl index 95fdd98c5608..2c628d4d87a8 100644 --- a/lib/compiler/src/beam_ssa_ss.erl +++ b/lib/compiler/src/beam_ssa_ss.erl @@ -45,8 +45,10 @@ new/3, phi/4, prune/3, + prune_by_add/2, set_call_result/4, set_status/3, + size/1, variables/1]). -include("beam_ssa.hrl"). @@ -514,6 +516,58 @@ is_safe_to_prune(V, LiveVars, State) -> end end. +%%% +%%% As prune/3, but doing the pruning by rebuilding the surviving +%%% state from scratch. +%%% +%%% Throws `too_deep` if the depth of sharing state value chains +%%% exceeds SS_DEPTH_LIMIT. +%%% +-spec prune_by_add(sets:set(beam_ssa:b_var()), sharing_state()) + -> sharing_state(). +prune_by_add(LiveVars, State) -> + ?assert_state(State), + ?DP("Pruning to ~p~n", [sets:to_list(LiveVars)]), + ?DP("~s~n", [dump(State)]), + ?DP("Vertices: ~p~n", [beam_digraph:vertices(State)]), + R = prune_by_add([{0,V} || V <- sets:to_list(LiveVars), + beam_digraph:has_vertex(State, V)], + [], new(), State), + ?DP("Pruned result~n~s~n", [dump(R)]), + ?assert_state(R). + +prune_by_add([{Depth0,V}|Wanted], Edges, New0, Old) -> + ?DP("Looking at ~p~n", [V]), + ?DP("Curr:~n~s~n", [dump(New0)]), + ?DP("Wanted: ~p~n", [Wanted]), + case beam_digraph:has_vertex(New0, V) of + true -> + %% This variable is already added. + prune_by_add(Wanted, Edges, New0, Old); + false when Depth0 < ?SS_DEPTH_LIMIT -> + %% This variable has to be kept. Add it to the new graph. + New = add_vertex(New0, V, beam_digraph:vertex(Old, V)), + %% Add all incoming edges to this node. + InEdges = beam_digraph:in_edges(Old, V), + Depth = Depth0 + 1, + InNodes = [{Depth, From} || {From,_,_} <- InEdges], + prune_by_add(InNodes ++ Wanted, InEdges ++ Edges, New, Old); + false -> + %% We're in too deep, give up. This case will probably + %% never be hit as it would require a previous prune/3 + %% application which doesn't hit the depth limit and for + %% it to remove more than half of the nodes to trigger the + %% use of prune_by_add/2, and in a later iteration trigger + %% the depth limit. As it cannot be definitely ruled out, + %% take the hit against the coverage statistics, as the + %% handling code in beam_ssa_alias is tested. + throw(too_deep) + end; +prune_by_add([], Edges, New0, _Old) -> + foldl(fun({Src,Dst,Lbl}, New) -> + add_edge(New, Src, Dst, Lbl) + end, New0, Edges). + -spec set_call_result(beam_ssa:b_var(), call_in_arg_status(), sharing_state(), non_neg_integer()) -> {sharing_state(),non_neg_integer()}. @@ -598,6 +652,10 @@ get_alias_edges(V, State) -> end], EmbedEdges ++ OutEdges. +-spec size(sharing_state()) -> non_neg_integer(). +size(State) -> + beam_digraph:no_vertices(State). + -spec variables(sharing_state()) -> [beam_ssa:b_var()]. variables(State) -> [V || {V,_Lbl} <- beam_digraph:vertices(State)].