Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: destroy spawn of satan type punning in ETF parser #894

Merged
merged 2 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions include/dpp/etf.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,22 +103,6 @@ enum etf_token_type : uint8_t {
ett_atom_utf8_small = 'w'
};

/**
* @brief A horrible structure used within the ETF parser to convert uint64_t to double and back.
* This is horrible, but it is the official way erlang term format does this, so we can't really
* mess with it much.
*/
union type_punner {
/**
* @brief binary integer value
*/
uint64_t ui64;
/**
* @brief double floating point value
*/
double df;
};

/**
* @brief Represents a buffer of bytes being encoded into ETF
*/
Expand Down
57 changes: 31 additions & 26 deletions src/dpp/etf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,39 +55,39 @@ void etf_parser::buffer_write(etf_buffer *pk, const char *bytes, size_t l) {
pk->length += l;
}

void etf_parser::append_version(etf_buffer *b) {
void etf_parser::append_version(etf_buffer *b) {
static unsigned char buf[1] = {FORMAT_VERSION};
buffer_write(b, (const char *)buf, 1);
}

void etf_parser::append_nil(etf_buffer *b) {
void etf_parser::append_nil(etf_buffer *b) {
static unsigned char buf[5] = {ett_atom_small, 3, 'n', 'i', 'l'};
buffer_write(b, (const char *)buf, 5);
}

void etf_parser::append_false(etf_buffer *b) {
void etf_parser::append_false(etf_buffer *b) {
static unsigned char buf[7] = {ett_atom_small, 5, 'f', 'a', 'l', 's', 'e'};
buffer_write(b, (const char *)buf, 7);
}

void etf_parser::append_true(etf_buffer *b) {
void etf_parser::append_true(etf_buffer *b) {
static unsigned char buf[6] = {ett_atom_small, 4, 't', 'r', 'u', 'e'};
buffer_write(b, (const char *)buf, 6);
}

void etf_parser::append_small_integer(etf_buffer *b, unsigned char d) {
void etf_parser::append_small_integer(etf_buffer *b, unsigned char d) {
unsigned char buf[2] = {ett_smallint, d};
buffer_write(b, (const char *)buf, 2);
}

void etf_parser::append_integer(etf_buffer *b, int32_t d) {
void etf_parser::append_integer(etf_buffer *b, int32_t d) {
unsigned char buf[5];
buf[0] = ett_integer;
store_32_bits(buf + 1, d);
buffer_write(b, (const char *)buf, 5);
}

void etf_parser::append_unsigned_long_long(etf_buffer *b, unsigned long long d) {
void etf_parser::append_unsigned_long_long(etf_buffer *b, unsigned long long d) {
unsigned char buf[1 + 2 + sizeof(unsigned long long)];
buf[0] = ett_bigint_small;

Expand All @@ -103,7 +103,7 @@ void etf_parser::buffer_write(etf_buffer *pk, const char *bytes, size_t l) {
buffer_write(b, (const char *)buf, 1 + 2 + bytes_enc);
}

void etf_parser::append_long_long(etf_buffer *b, long long d) {
void etf_parser::append_long_long(etf_buffer *b, long long d) {
unsigned char buf[1 + 2 + sizeof(unsigned long long)];
buf[0] = ett_bigint_small;
buf[2] = d < 0 ? 1 : 0;
Expand All @@ -118,16 +118,16 @@ void etf_parser::buffer_write(etf_buffer *pk, const char *bytes, size_t l) {
buffer_write(b, (const char *)buf, 1 + 2 + bytes_enc);
}

void etf_parser::append_double(etf_buffer *b, double f) {
void etf_parser::append_double(etf_buffer *b, double f) {
unsigned char buf[1 + 8] = {0};
uint64_t val_64_out{0};
buf[0] = ett_new_float;
type_punner p;
p.df = f;
store_64_bits(buf + 1, p.ui64);
memcpy(&val_64_out, &f, sizeof(double));
store_64_bits(buf + 1, val_64_out);
buffer_write(b, (const char *)buf, 1 + 8);
}

void etf_parser::append_atom(etf_buffer *b, const char *bytes, size_t size) {
void etf_parser::append_atom(etf_buffer *b, const char *bytes, size_t size) {
if (size < 255) {
unsigned char buf[2] = {ett_atom_small, (unsigned char)size};
buffer_write(b, (const char *)buf, 2);
Expand All @@ -146,7 +146,7 @@ void etf_parser::buffer_write(etf_buffer *pk, const char *bytes, size_t l) {
}
}

void etf_parser::append_atom_utf8(etf_buffer *b, const char *bytes, size_t size) {
void etf_parser::append_atom_utf8(etf_buffer *b, const char *bytes, size_t size) {
if (size < 255) {
unsigned char buf[2] = {ett_atom_utf8_small, (unsigned char)size};
buffer_write(b, (const char *)buf, 2);
Expand Down Expand Up @@ -174,7 +174,7 @@ void etf_parser::append_binary(etf_buffer *b, const char *bytes, size_t size) {
buffer_write(b, (const char *)bytes, size);
}

void etf_parser::append_string(etf_buffer *b, const char *bytes, size_t size) {
void etf_parser::append_string(etf_buffer *b, const char *bytes, size_t size) {
unsigned char buf[3];
buf[0] = ett_string;

Expand All @@ -183,7 +183,7 @@ void etf_parser::append_binary(etf_buffer *b, const char *bytes, size_t size) {
buffer_write(b, (const char *)bytes, size);
}

void etf_parser::append_tuple_header(etf_buffer *b, size_t size) {
void etf_parser::append_tuple_header(etf_buffer *b, size_t size) {
if (size < 256) {
unsigned char buf[2];
buf[0] = ett_small_tuple;
Expand All @@ -197,19 +197,19 @@ void etf_parser::append_binary(etf_buffer *b, const char *bytes, size_t size) {
}
}

void etf_parser::append_nil_ext(etf_buffer *b) {
void etf_parser::append_nil_ext(etf_buffer *b) {
static unsigned char buf[1] = {ett_nil};
buffer_write(b, (const char *)buf, 1);
}

void etf_parser::append_list_header(etf_buffer *b, size_t size) {
void etf_parser::append_list_header(etf_buffer *b, size_t size) {
unsigned char buf[5];
buf[0] = ett_list;
store_32_bits(buf + 1, size);
buffer_write(b, (const char *)buf, 5);
}

void etf_parser::append_map_header(etf_buffer *b, size_t size) {
void etf_parser::append_map_header(etf_buffer *b, size_t size) {
unsigned char buf[5];
buf[0] = ett_map;
store_32_bits(buf + 1, size);
Expand Down Expand Up @@ -269,28 +269,33 @@ const char* etf_parser::read_string(uint32_t length) {
return (const char*)str;
}

static const char* atom_null = "null";
static const char* atom_true = "true";
static const char* atom_false = "false";

json etf_parser::process_atom(const char* atom, uint16_t length) {
json j;

if (atom == NULL) {
return j;
}

/**
* WARNING: PERFORMANCE HOTSPOT
* Valgrind cachegrind identified this as a performance hotspot in DPP when using ETF. This gets called a LOT,
* and the difference here between comparing the simple constant types here where the lengths are known in this
* way, compared to using memcpy or strncpy four times, is absolutely huge (factors of ten). Considering this
* can be called hundreds of times a second on a busy bot, the performance of this function should not be
* underestimated. Think carefully before 'tidying' this function further!
*
*/
if (length >= 3 && length <= 5) {
if (length == 4 && *((uint32_t*)atom) == *((uint32_t*)atom_null)) { // "null"
if (length == 4 && atom[0] == 'n' && atom[1] == 'u' && atom[2] == 'l' && atom[3] == 'l') { // "null"
braindigitalis marked this conversation as resolved.
Show resolved Hide resolved
return j;
}
else if(length == 4 && *((uint32_t*)atom) == *((uint32_t*)atom_true)) { // "true"
else if (length == 4 && atom[0] == 't' && atom[1] == 'r' && atom[2] == 'u' && atom[3] == 'e') { // "true"
return true;
}
else if (length == 3 && atom[0] == 'n' && atom[1] == 'i' && atom[2] == 'l') { // "nil"
return j;
}
else if (length == 5 && *((uint32_t*)atom) == *((uint32_t*)atom_false) && atom[4] == 'e') { // "fals","e"
else if (length == 5 && atom[0] == 'f' && atom[1] == 'a' && atom[2] == 'l' && atom[3] == 's' && atom[4] == 'e') { // "fals","e"
return false;
}
}
Expand Down