Skip to content

Commit

Permalink
erts: Optimize atom_to_binary/1
Browse files Browse the repository at this point in the history
When an atom is created, we now create a binary literal of it, so that atom_to_binary returns the pre-allocated binary literal instead of a newly converted one.
  • Loading branch information
lucioleKi committed Jul 16, 2024
1 parent e927b0b commit d1fca18
Show file tree
Hide file tree
Showing 18 changed files with 155 additions and 139 deletions.
132 changes: 58 additions & 74 deletions erts/emulator/beam/atom.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "global.h"
#include "hash.h"
#include "atom.h"
#include "erl_global_literals.h"


#define ATOM_SIZE 3000
Expand All @@ -48,19 +49,6 @@ static erts_rwmtx_t atom_table_lock;
static erts_atomic_t atom_put_ops;
#endif

/* Functions for allocating space for the ext of atoms. We do not
* use malloc for each atom to prevent excessive memory fragmentation
*/

typedef struct _atom_text {
struct _atom_text* next;
unsigned char text[ATOM_TEXT_SIZE];
} AtomText;

static AtomText* text_list; /* List of text buffers */
static byte *atom_text_pos;
static byte *atom_text_end;
static Uint reserved_atom_space; /* Total amount of atom text space */
static Uint atom_space; /* Amount of atom text space used */

/*
Expand All @@ -81,44 +69,8 @@ void atom_info(fmtfn_t to, void *to_arg)
atom_read_unlock();
}

/*
* Allocate an atom text segment.
*/
static void
more_atom_space(void)
{
AtomText* ptr;

ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText));

ptr->next = text_list;
text_list = ptr;

atom_text_pos = ptr->text;
atom_text_end = atom_text_pos + ATOM_TEXT_SIZE;
reserved_atom_space += sizeof(AtomText);

VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE));
}

/*
* Allocate string space within an atom text segment.
*/

static byte*
atom_text_alloc(int bytes)
{
byte *res;

ASSERT(bytes <= MAX_ATOM_SZ_LIMIT);
if (atom_text_pos + bytes >= atom_text_end) {
more_atom_space();
}
res = atom_text_pos;
atom_text_pos += bytes;
atom_space += bytes;
return res;
}

/*
* Calculate atom hash value (using the hash algorithm
Expand All @@ -128,7 +80,7 @@ atom_text_alloc(int bytes)
static HashValue
atom_hash(Atom* obj)
{
byte* p = obj->name;
byte* p = obj->u.name;
int len = obj->len;
HashValue h = 0, g;
byte v;
Expand All @@ -150,12 +102,23 @@ atom_hash(Atom* obj)
return h;
}

const byte *erts_atom_get_name(const Atom *atom)
{
byte *name;
Uint size;
Uint offset;
ERTS_GET_BITSTRING(atom->u.bin, name, offset, size);
ASSERT(offset == 0 && (size % 8) == 0);
(void) size;
(void) offset;
return name;
}

static int
atom_cmp(Atom* tmpl, Atom* obj)
{
if (tmpl->len == obj->len &&
sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0)
sys_memcmp(tmpl->u.name, erts_atom_get_name(obj), tmpl->len) == 0)
return 0;
return 1;
}
Expand All @@ -164,13 +127,39 @@ atom_cmp(Atom* tmpl, Atom* obj)
static Atom*
atom_alloc(Atom* tmpl)
{
Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));
Atom *obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));

{
Eterm *hp;
Uint heap_size = 0;
ErtsHeapFactory factory;
ErlOffHeap oh;
struct erl_off_heap_header **literal_ohp;

if (tmpl->len <= ERL_ONHEAP_BINARY_LIMIT) {
heap_size = heap_bits_size(NBITS(tmpl->len));
} else {
heap_size = ERL_REFC_BITS_SIZE;
}

obj->name = atom_text_alloc(tmpl->len);
sys_memcpy(obj->name, tmpl->name, tmpl->len);
hp = erts_global_literal_allocate(heap_size, &literal_ohp);
ERTS_INIT_OFF_HEAP(&oh);
oh.first = *literal_ohp;

erts_factory_static_init(&factory, hp, heap_size, &oh);
*literal_ohp = oh.first;
obj->u.bin = erts_hfact_new_binary_from_data(&factory,
0,
tmpl->len,
tmpl->u.name);
erts_global_literal_register(&obj->u.bin, hp, heap_size);
}

obj->len = tmpl->len;
obj->latin1_chars = tmpl->latin1_chars;
obj->slot.index = -1;
atom_space += tmpl->len;


/*
* Precompute ordinal value of first 3 bytes + 7 bits.
Expand All @@ -186,7 +175,7 @@ atom_alloc(Atom* tmpl)

j = (tmpl->len < 4) ? tmpl->len : 4;
for(i = 0; i < j; ++i)
c[i] = tmpl->name[i];
c[i] = tmpl->u.name[i];
for(; i < 4; ++i)
c[i] = '\0';
obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1);
Expand Down Expand Up @@ -293,7 +282,7 @@ erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
}

a.len = tlen;
a.name = (byte *) text;
a.u.name = (byte *) text;
atom_read_lock();
aix = index_get(&erts_atom_table, (void*) &a);
atom_read_unlock();
Expand Down Expand Up @@ -333,7 +322,7 @@ erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)

a.len = tlen;
a.latin1_chars = (Sint16) no_latin1_chars;
a.name = (byte *) text;
a.u.name = (byte *) text;
atom_write_lock();
aix = index_put(&erts_atom_table, (void*) &a);
atom_write_unlock();
Expand Down Expand Up @@ -400,7 +389,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)

latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len);

a.name = (byte*)name;
a.u.name = (byte*)name;
a.len = (Sint16)len;
break;
case ERTS_ATOM_ENC_7BIT_ASCII:
Expand All @@ -415,7 +404,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
}

a.len = (Sint16)len;
a.name = (byte*)name;
a.u.name = (byte*)name;
break;
case ERTS_ATOM_ENC_UTF8:
if (len > MAX_ATOM_SZ_LIMIT) {
Expand All @@ -427,7 +416,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
* name will fail. */

a.len = (Sint16)len;
a.name = (byte*)name;
a.u.name = (byte*)name;
break;
}

Expand All @@ -446,7 +435,7 @@ erts_atom_get_text_space_sizes(Uint *reserved, Uint *used)
if (lock)
atom_read_lock();
if (reserved)
*reserved = reserved_atom_space;
*reserved = atom_space;
if (used)
*used = atom_space;
if (lock)
Expand Down Expand Up @@ -479,33 +468,28 @@ init_atom_table(void)
f.meta_free = (HMFREE_FUN) erts_free;
f.meta_print = (HMPRINT_FUN) erts_print;

atom_text_pos = NULL;
atom_text_end = NULL;
reserved_atom_space = 0;
atom_space = 0;
text_list = NULL;

erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table,
"atom_tab", ATOM_SIZE, erts_atom_table_size, f);
more_atom_space();

/* Ordinary atoms */
/* Ordinary atoms. a is a template for creating an entry in the atom table */
for (i = 0; erl_atom_names[i] != 0; i++) {
int ix;
a.len = sys_strlen(erl_atom_names[i]);
a.latin1_chars = a.len;
a.name = (byte*)erl_atom_names[i];
a.u.name = (byte*)erl_atom_names[i];
a.slot.index = i;


#ifdef DEBUG
/* Verify 7-bit ascii */
for (ix = 0; ix < a.len; ix++) {
ASSERT((a.name[ix] & 0x80) == 0);
ASSERT((a.u.name[ix] & 0x80) == 0);
}
#endif
ix = index_put(&erts_atom_table, (void*) &a);
atom_text_pos -= a.len;
atom_space -= a.len;
atom_tab(ix)->name = (byte*)erl_atom_names[i];
(void) ix;
/* Assert that the entry in the atom table is not a template */
ASSERT(erts_atom_get_name(atom_tab(ix)));
}

}
Expand Down
11 changes: 8 additions & 3 deletions erts/emulator/beam/atom.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ typedef struct atom {
Sint16 len; /* length of atom name (UTF-8 encoded) */
Sint16 latin1_chars; /* 0-255 if atom can be encoded in latin1; otherwise, -1 */
int ord0; /* ordinal value of first 3 bytes + 7 bits */
byte* name; /* name of atom */
union{
byte* name; /* name of atom, used by templates */
Eterm bin; /* name of atom, used when atom is in table*/
} u;
} Atom;

extern IndexTable erts_atom_table;
Expand All @@ -59,6 +62,8 @@ ERTS_GLB_INLINE Atom* atom_tab(Uint i);
ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term);
ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1);

const byte *erts_atom_get_name(const Atom *atom);

#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE Atom*
atom_tab(Uint i)
Expand All @@ -73,7 +78,7 @@ ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term)
return 0;
a = atom_tab(atom_val(term));
return (len == (size_t) a->len
&& sys_memcmp((void *) a->name, (void *) text, len) == 0);
&& sys_memcmp((void *) erts_atom_get_name(a), (void *) text, len) == 0);
}

ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1)
Expand All @@ -87,7 +92,7 @@ ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1)
return 0;
a = atom_tab(atom_val(term));
len = a->len;
aname = a->name;
aname = erts_atom_get_name(a);
if (is_latin1) {
for (i = 0; i < len; s++) {
if (aname[i] < 0x80) {
Expand Down
4 changes: 2 additions & 2 deletions erts/emulator/beam/bif.c
Original file line number Diff line number Diff line change
Expand Up @@ -2929,10 +2929,10 @@ BIF_RETTYPE atom_to_list_1(BIF_ALIST_1)
BIF_RET(NIL); /* the empty atom */

ares =
erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL);
ASSERT(ares == ERTS_UTF8_OK); (void)ares;

res = erts_utf8_to_list(BIF_P, num_chars, ap->name, ap->len, ap->len,
res = erts_utf8_to_list(BIF_P, num_chars, erts_atom_get_name(ap), ap->len, ap->len,
&num_built, &num_eaten, NIL);
ASSERT(num_built == num_chars);
ASSERT(num_eaten == ap->len);
Expand Down
2 changes: 1 addition & 1 deletion erts/emulator/beam/dist.c
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ int is_node_name_atom(Eterm a)
return 0;
i = atom_val(a);
ASSERT((i > 0) && (i < atom_table_size()) && (atom_tab(i) != NULL));
return is_node_name((char*)atom_tab(i)->name, atom_tab(i)->len);
return is_node_name((char*)erts_atom_get_name(atom_tab(i)), atom_tab(i)->len);
}

static void
Expand Down
4 changes: 2 additions & 2 deletions erts/emulator/beam/erl_bif_ddll.c
Original file line number Diff line number Diff line change
Expand Up @@ -1725,7 +1725,7 @@ static int errdesc_to_code(Eterm errdesc, int *code /* out */)
for (i = 0; errcode_tab[i].atm != NULL; ++i) {
int len = sys_strlen(errcode_tab[i].atm);
if (len == ap->len &&
!sys_strncmp(errcode_tab[i].atm,(char *) ap->name,len)) {
!sys_strncmp(errcode_tab[i].atm,(char *) erts_atom_get_name(ap),len)) {
*code = errcode_tab[i].code;
return 0;
}
Expand Down Expand Up @@ -1799,7 +1799,7 @@ static char *pick_list_or_atom(Eterm name_term)
goto error;
}
name = erts_alloc(ERTS_ALC_T_DDLL_TMP_BUF, ap->len + 1);
sys_memcpy(name,ap->name,ap->len);
sys_memcpy(name,erts_atom_get_name(ap),ap->len);
name[ap->len] = '\0';
} else {
if (erts_iolist_size(name_term, &name_len)) {
Expand Down
2 changes: 1 addition & 1 deletion erts/emulator/beam/erl_bif_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -2703,7 +2703,7 @@ c_compiler_used(Eterm **hpp, Uint *szp)
static int is_snif_term(Eterm module_atom) {
int i;
Atom *a = atom_tab(atom_val(module_atom));
char *aname = (char *) a->name;
char *aname = (char *) erts_atom_get_name(a);

/* if a->name has a '.' then the bif (snif) is bogus i.e a package */
for (i = 0; i < a->len; i++) {
Expand Down
2 changes: 1 addition & 1 deletion erts/emulator/beam/erl_bif_re.c
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
}
}
ASSERT(tmpb != NULL);
sys_memcpy(tmpb,ap->name,ap->len);
sys_memcpy(tmpb,erts_atom_get_name(ap),ap->len);
tmpb[ap->len] = '\0';
} else {
ErlDrvSizeT slen;
Expand Down
4 changes: 2 additions & 2 deletions erts/emulator/beam/erl_db_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -3851,13 +3851,13 @@ bool db_eq_comp(DbTableCommon* tb, Eterm a, DbTerm* b)

int db_is_variable(Eterm obj)
{
byte *b;
const byte *b;
int n;
int N;

if (is_not_atom(obj))
return -1;
b = atom_tab(atom_val(obj))->name;
b = erts_atom_get_name(atom_tab(atom_val(obj)));
if ((n = atom_tab(atom_val(obj))->len) < 2)
return -1;
if (*b++ != '$')
Expand Down
10 changes: 5 additions & 5 deletions erts/emulator/beam/erl_nif.c
Original file line number Diff line number Diff line change
Expand Up @@ -1799,9 +1799,9 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len,
return 0;
}
if (ap->latin1_chars == ap->len) {
sys_memcpy(buf, ap->name, ap->len);
sys_memcpy(buf, erts_atom_get_name(ap), ap->len);
} else {
int dlen = erts_utf8_to_latin1((byte*)buf, ap->name, ap->len);
int dlen = erts_utf8_to_latin1((byte*)buf, erts_atom_get_name(ap), ap->len);
ASSERT(dlen == ap->latin1_chars); (void)dlen;
}
buf[ap->latin1_chars] = '\0';
Expand All @@ -1810,7 +1810,7 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len,
if (ap->len >= len) {
return 0;
}
sys_memcpy(buf, ap->name, ap->len);
sys_memcpy(buf, erts_atom_get_name(ap), ap->len);
buf[ap->len] = '\0';
return ap->len + 1;
}
Expand Down Expand Up @@ -4480,8 +4480,8 @@ void erts_print_nif_taints(fmtfn_t to, void* to_arg)

t = (struct tainted_module_t*) erts_atomic_read_nob(&first_taint);
for ( ; t; t = t->next) {
const Atom* atom = atom_tab(atom_val(t->module_atom));
erts_cbprintf(to,to_arg,"%s%.*s", delim, atom->len, atom->name);
Atom* atom = atom_tab(atom_val(t->module_atom));
erts_cbprintf(to,to_arg,"%s%.*s", delim, atom->len, erts_atom_get_name(atom));
delim = ",";
}
erts_cbprintf(to,to_arg,"\n");
Expand Down
Loading

0 comments on commit d1fca18

Please sign in to comment.