Skip to content

Commit

Permalink
Don't compress the literal chunk in BEAM files
Browse files Browse the repository at this point in the history
See erlang#8967 and erlang#8940 for a discussion about why this is useful.

This commit also makes it possible to read out the literal chunk
decoded like so:

    beam_lib:chunks(Beam, [literals])

Closes erlang#8967
  • Loading branch information
bjorng committed Oct 25, 2024
1 parent 0b6fe48 commit dfd237c
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 35 deletions.
16 changes: 11 additions & 5 deletions erts/emulator/beam/beam_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ static void free_literal_fragment(ErlHeapFragment *fragment) {
}

static int parse_decompressed_literals(BeamFile *beam,
byte *data,
const byte *data,
uLongf size) {
BeamFile_LiteralTable *literals;
BeamFile_LiteralEntry *entries;
Expand Down Expand Up @@ -796,6 +796,7 @@ static int parse_literal_chunk(BeamFile *beam, IFF_Chunk *chunk) {
uLongf uncompressed_size_z;
byte *uncompressed_data;
int success;
int zerr;

beamreader_init(chunk->data, chunk->size, &reader);
compressed_size = chunk->size;
Expand All @@ -807,13 +808,18 @@ static int parse_literal_chunk(BeamFile *beam, IFF_Chunk *chunk) {
uncompressed_data = erts_alloc(ERTS_ALC_T_TMP, uncompressed_size);
success = 0;

if (erl_zlib_uncompress(uncompressed_data,
&uncompressed_size_z,
reader.head,
compressed_size) == Z_OK) {
zerr = erl_zlib_uncompress(uncompressed_data,
&uncompressed_size_z,
reader.head,
compressed_size);
if (zerr == Z_OK) {
success = parse_decompressed_literals(beam,
uncompressed_data,
uncompressed_size_z);
} else {
success = parse_decompressed_literals(beam,
reader.head,
uncompressed_size_z);
}

erts_free(ERTS_ALC_T_TMP, (void*)uncompressed_data);
Expand Down
26 changes: 18 additions & 8 deletions lib/compiler/src/beam_asm.erl
Original file line number Diff line number Diff line change
Expand Up @@ -174,14 +174,7 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks0, CompileInfo, Com
%% Create the literal table chunk. It is important not to build an empty chunk,
%% as that would change the MD5.

LiteralChunk = case beam_dict:literal_table(Dict) of
{0,[]} -> [];
{NumLiterals,LitTab0} ->
LitTab1 = [<<NumLiterals:32>>,LitTab0],
LitTab = zlib:compress(LitTab1),
chunk(<<"LitT">>, <<(iolist_size(LitTab1)):32>>,
LitTab)
end,
LiteralChunk = build_literal_chunk(CompilerOpts, Dict),

%% Create the line chunk.
LineChunk = chunk(<<"Line">>, build_line_table(Dict, CompilerOpts)),
Expand Down Expand Up @@ -334,6 +327,23 @@ encode_line_items([{F,L}|T], _) ->
[encode(?tag_a, F),encode(?tag_i, L)|encode_line_items(T, F)];
encode_line_items([], _) -> [].

build_literal_chunk(Options, Dict) ->
case beam_dict:literal_table(Dict) of
{0,[]} ->
[];
{NumLiterals,LitTab0} ->
LitTab1 = [<<NumLiterals:32>>,LitTab0],
LitTab = case member(compressed_literals, Options) of
true ->
%% Erlang/OTP 27 and earlier.
zlib:compress(LitTab1);
false ->
%% Erlang/OTP 28 and later.
LitTab1
end,
chunk(<<"LitT">>, <<(iolist_size(LitTab1)):32>>,
LitTab)
end.
%%
%% If the attributes contains no 'vsn' attribute, we'll insert one
%% with an MD5 "checksum" calculated on the code as its value.
Expand Down
22 changes: 5 additions & 17 deletions lib/compiler/src/beam_disasm.erl
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,11 @@ process_chunks(F) ->
Atoms = mk_atoms(AtomsList),
LambdaBin = optional_chunk(F, "FunT"),
Lambdas = beam_disasm_lambdas(LambdaBin, Atoms),
LiteralBin = optional_chunk(F, "LitT"),
Literals = beam_disasm_literals(LiteralBin),
Literals1 = case optional_chunk(F, literals) of
none -> [];
Literals0 -> Literals0
end,
Literals = gb_trees:from_orddict(Literals1),
TypeBin = optional_chunk(F, "Type"),
Types = beam_disasm_types(TypeBin),
Code = beam_disasm_code(CodeBin, Atoms, mk_imports(ImportsList),
Expand Down Expand Up @@ -244,21 +247,6 @@ disasm_lambdas(<<F:32,A:32,Lbl:32,Index:32,NumFree:32,OldUniq:32,More/binary>>,
[{OldIndex,Info}|disasm_lambdas(More, Atoms, OldIndex+1)];
disasm_lambdas(<<>>, _, _) -> [].

%%-----------------------------------------------------------------------
%% Disassembles the literal table (constant pool) of a BEAM file.
%%-----------------------------------------------------------------------

-spec beam_disasm_literals('none' | binary()) -> literals().

beam_disasm_literals(none) -> none;
beam_disasm_literals(<<_:32,Compressed/binary>>) ->
<<_:32,Tab/binary>> = zlib:uncompress(Compressed),
gb_trees:from_orddict(disasm_literals(Tab, 0)).

disasm_literals(<<Sz:32,Ext:Sz/binary,T/binary>>, Index) ->
[{Index,binary_to_term(Ext)}|disasm_literals(T, Index+1)];
disasm_literals(<<>>, _) -> [].

%%-----------------------------------------------------------------------
%% Disassembles the type table of a BEAM file.
%%-----------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion lib/compiler/src/compile.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,7 @@ expand_opt(r25, Os) ->
expand_opt(r26, Os) ->
[no_bsm_opt | expand_opt(r27, Os)];
expand_opt(r27, Os) ->
[no_long_atoms | Os];
[no_long_atoms, compressed_literals | Os];
expand_opt({debug_info_key,_}=O, Os) ->
[encrypt_debug_info,O|Os];
expand_opt(no_type_opt=O, Os) ->
Expand Down
38 changes: 36 additions & 2 deletions lib/stdlib/src/beam_lib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ follows:
- `indexed_imports ("ImpT")`
- `labeled_exports ("ExpT")`
- `labeled_locals ("LocT")`
- `literals ("LitT")`
- `locals ("LocT")`
- `documentation ("Docs")`
Expand Down Expand Up @@ -231,12 +232,14 @@ computed from the `debug_info` chunk.
""".
-type chunkid() :: nonempty_string(). % approximation of the strings below
%% "Abst" | "Dbgi" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom" | "AtU8" | "Docs"
%% "LitT"
-type chunkname() :: 'abstract_code' | 'debug_info'
| 'attributes' | 'compile_info'
| 'exports' | 'labeled_exports'
| 'imports' | 'indexed_imports'
| 'locals' | 'labeled_locals'
| 'atoms' | 'documentation'.
| 'atoms' | 'documentation'
| 'literals'.
-type chunkref() :: chunkname() | chunkid().
-type attrib_entry() :: {Attribute :: atom(), [AttributeValue :: term()]}.
Expand All @@ -246,6 +249,8 @@ computed from the `debug_info` chunk.
-doc "[EEP-48 documentation format](`e:kernel:eep48_chapter.md#the-docs-format`)".
-type docs() :: #docs_v1{}.

-type literals() :: {index(), term()}.

-doc """
The list of attributes is sorted on `Attribute` (in `t:attrib_entry/0`) and each
attribute name occurs once in the list. The attribute values occur in the same
Expand All @@ -263,7 +268,8 @@ order as in the file. The lists of functions are also sorted.
| {'locals', [{atom(), arity()}]}
| {'labeled_locals', [labeled_entry()]}
| {'atoms', [{integer(), atom()}]}
| {'documentation', docs()}.
| {'documentation', docs()}
| {'literals', literals()}.

%% Error reasons
-type info_rsn() :: {'chunk_too_big', file:filename(),
Expand Down Expand Up @@ -1123,6 +1129,14 @@ chunk_to_data(atoms=Id, _Chunk, _File, Cs, AtomTable0, _Mod) ->
AtomTable = ensure_atoms(AtomTable0, Cs),
Atoms = ets:tab2list(AtomTable),
{AtomTable, {Id, lists:sort(Atoms)}};
chunk_to_data(literals=Id, Chunk, File, _Cs, AtomTable, _Mod) ->
try extract_literals(Chunk) of
Literals ->
{AtomTable, {Id, Literals}}
catch
_:_ ->
error({invalid_chunk, File, chunk_name_to_id(Id, File)})
end;
chunk_to_data(ChunkName, Chunk, File,
Cs, AtomTable, _Mod) when is_atom(ChunkName) ->
case catch symbols(Chunk, AtomTable, Cs, ChunkName) of
Expand All @@ -1146,6 +1160,7 @@ chunk_name_to_id(abstract_code, _) -> "Abst";
chunk_name_to_id(debug_info, _) -> "Dbgi";
chunk_name_to_id(compile_info, _) -> "CInf";
chunk_name_to_id(documentation, _) -> "Docs";
chunk_name_to_id(literals, _) -> "LitT";
chunk_name_to_id(Other, File) ->
error({unknown_chunk, File, Other}).

Expand Down Expand Up @@ -1245,6 +1260,25 @@ decode_arg_val(<<High:3,0:1,1:1, _Tag:3, Low, Code0/binary>>) ->
N = (High bsl 8) bor Low,
{N, Code0}.

extract_literals(Chunk0) ->
<<OriginalSize:32, Chunk/binary>> = Chunk0,
Literals0 = try
%% Literals are compressed in Erlang/OTP 27 and
%% earlier.
zlib:uncompress(Chunk)
catch
error:_ ->
%% Literals are not compressed in Erlang/OTP
%% 28 and later.
Chunk
end,
<<NumLiterals:32, Literals1/binary>> = Literals0,
OriginalSize = byte_size(Literals0), %Sanity check.
Literals = [binary_to_term(Term) ||
<<N:32, Term:N/binary>> <:= Literals1],
NumLiterals = length(Literals), %Sanity check.
lists:zip(lists:seq(0, NumLiterals - 1), Literals).

%%% Utils.

-record(bb, {pos = 0 :: integer(),
Expand Down
35 changes: 33 additions & 2 deletions lib/stdlib/test/beam_lib_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
init_per_group/2,end_per_group/2,
normal/1, error/1, cmp/1, cmp_literals/1, strip/1, strip_add_chunks/1, otp_6711/1,
building/1, md5/1, encrypted_abstr/1, encrypted_abstr_file/1,
missing_debug_info_backend/1]).
missing_debug_info_backend/1, literals/1
]).
-export([test_makedep_abstract_code/1]).

-export([init_per_testcase/2, end_per_testcase/2]).
Expand All @@ -48,7 +49,8 @@ suite() ->
all() ->
[error, normal, cmp, cmp_literals, strip, strip_add_chunks, otp_6711,
building, md5, encrypted_abstr, encrypted_abstr_file,
missing_debug_info_backend, test_makedep_abstract_code
missing_debug_info_backend, test_makedep_abstract_code,
literals
].

groups() ->
Expand Down Expand Up @@ -874,6 +876,29 @@ missing_debug_info_backend(Conf) ->

ok.

literals(Conf) ->
do_literals(Conf, []),
do_literals(Conf, [r27]),

ok.

do_literals(Conf, Options) ->
PrivDir = ?privdir,
Simple = filename:join(PrivDir, "simple"),
Source = Simple ++ ".erl",
BeamFile = Simple ++ ".beam",
simple_file(Source, simple, literals),

{ok,simple} = compile:file(Source, [{outdir,PrivDir},report|Options]),

{ok, {simple, [{literals,[{0,{literal,tuple}}]}]}} =
beam_lib:chunks(BeamFile, [literals]),

ok = file:delete(Source),
ok = file:delete(BeamFile),

ok.

compare_chunks(File1, File2, ChunkIds) ->
{ok, {_, Chunks1}} = beam_lib:chunks(File1, ChunkIds),
{ok, {_, Chunks2}} = beam_lib:chunks(File2, ChunkIds),
Expand Down Expand Up @@ -979,6 +1004,12 @@ simple_file(File, Module, lines) ->
"t(A) ->\n"
" A+1.\n"]),
ok = file:write_file(File, B);
simple_file(File, Module, literals) ->
B = list_to_binary(["-module(", atom_to_list(Module), "). "
"-export([t/0]). "
"t() -> "
" {literal, tuple}. "]),
ok = file:write_file(File, B);
simple_file(File, Module, F) ->
B = list_to_binary(["-module(", atom_to_list(Module), "). "
"-export([t/0]). "
Expand Down

0 comments on commit dfd237c

Please sign in to comment.