Skip to content

Commit

Permalink
ballet: prefix blake3 functions with fd
Browse files Browse the repository at this point in the history
  • Loading branch information
anwayde authored and mmcgee-jump committed Dec 12, 2024
1 parent 73ed876 commit eef2f7e
Show file tree
Hide file tree
Showing 10 changed files with 285 additions and 285 deletions.
92 changes: 46 additions & 46 deletions src/ballet/blake3/blake3.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ INLINE output_t make_output(const uint32_t input_cv[8],
INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
uint32_t cv_words[8];
fd_memcpy(cv_words, self->input_cv, 32);
blake3_compress_in_place(cv_words, self->block, self->block_len,
self->counter, self->flags);
fd_blake3_compress_in_place(cv_words, self->block, self->block_len,
self->counter, self->flags);
store_cv_words(cv, cv_words);
}

Expand All @@ -98,8 +98,8 @@ INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
size_t offset_within_block = seek % 64;
uint8_t wide_buf[64];
while (out_len > 0) {
blake3_compress_xof(self->input_cv, self->block, self->block_len,
output_block_counter, self->flags | ROOT, wide_buf);
fd_blake3_compress_xof(self->input_cv, self->block, self->block_len,
output_block_counter, self->flags | ROOT, wide_buf);
size_t available_bytes = 64 - offset_within_block;
size_t fd_memcpy_len;
if (out_len > available_bytes) {
Expand All @@ -122,7 +122,7 @@ INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
input += take;
input_len -= take;
if (input_len > 0) {
blake3_compress_in_place(
fd_blake3_compress_in_place(
self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
self->flags | chunk_state_maybe_start_flag(self));
#pragma GCC diagnostic push
Expand All @@ -135,9 +135,9 @@ INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
}

while (input_len > BLAKE3_BLOCK_LEN) {
blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
self->chunk_counter,
self->flags | chunk_state_maybe_start_flag(self));
fd_blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
self->chunk_counter,
self->flags | chunk_state_maybe_start_flag(self));
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
self->blocks_compressed += 1;
Expand Down Expand Up @@ -195,9 +195,9 @@ INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len,
chunks_array_len += 1;
}

blake3_hash_many(chunks_array, chunks_array_len,
BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,
true, flags, CHUNK_START, CHUNK_END, out);
fd_blake3_hash_many(chunks_array, chunks_array_len,
BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,
true, flags, CHUNK_START, CHUNK_END, out);

// Hash the remaining partial chunk, if there is one. Note that the empty
// chunk (meaning the empty message) is a different codepath.
Expand Down Expand Up @@ -238,12 +238,12 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
parents_array_len += 1;
}

blake3_hash_many(parents_array, parents_array_len, 1, key,
0, // Parents always use counter 0.
false, flags | PARENT,
0, // Parents have no start flags.
0, // Parents have no end flags.
out);
fd_blake3_hash_many(parents_array, parents_array_len, 1, key,
0, // Parents always use counter 0.
false, flags | PARENT,
0, // Parents have no start flags.
0, // Parents have no end flags.
out);

// If there's an odd child left over, it becomes an output.
if (num_chaining_values > 2 * parents_array_len) {
Expand Down Expand Up @@ -273,16 +273,16 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
// Why not just have the caller split the input on the first update(), instead
// of implementing this special rule? Because we don't want to limit SIMD or
// multi-threading parallelism for that update().
static size_t blake3_compress_subtree_wide(const uint8_t *input,
size_t input_len,
const uint32_t key[8],
uint64_t chunk_counter,
uint8_t flags, uint8_t *out) {
static size_t fd_blake3_compress_subtree_wide(const uint8_t *input,
size_t input_len,
const uint32_t key[8],
uint64_t chunk_counter,
uint8_t flags, uint8_t *out) {
// Note that the single chunk case does *not* bump the SIMD degree up to 2
// when it is 1. If this implementation adds multi-threading in the future,
// this gives us the option of multi-threading even the 2-chunk case, which
// can help performance on smaller platforms.
if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) {
if (input_len <= fd_blake3_simd_degree() * BLAKE3_CHUNK_LEN) {
return compress_chunks_parallel(input, input_len, key, chunk_counter, flags,
out);
}
Expand All @@ -301,7 +301,7 @@ static size_t blake3_compress_subtree_wide(const uint8_t *input,
// account for the special case of returning 2 outputs when the SIMD degree
// is 1.
uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
size_t degree = blake3_simd_degree();
size_t degree = fd_blake3_simd_degree();
if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
// The special case: We always use a degree of at least two, to make
// sure there are two outputs. Except, as noted above, at the chunk
Expand All @@ -313,9 +313,9 @@ static size_t blake3_compress_subtree_wide(const uint8_t *input,

// Recurse! If this implementation adds multi-threading support in the
// future, this is where it will go.
size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key,
chunk_counter, flags, cv_array);
size_t right_n = blake3_compress_subtree_wide(
size_t left_n = fd_blake3_compress_subtree_wide(input, left_input_len, key,
chunk_counter, flags, cv_array);
size_t right_n = fd_blake3_compress_subtree_wide(
right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);

// The special case again. If simd_degree=1, then we'll have left_n=1 and
Expand Down Expand Up @@ -350,8 +350,8 @@ INLINE void compress_subtree_to_parent_node(
#endif

uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
chunk_counter, flags, cv_array);
size_t num_cvs = fd_blake3_compress_subtree_wide(input, input_len, key,
chunk_counter, flags, cv_array);
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);

// If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
Expand All @@ -378,29 +378,29 @@ INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8],
self->cv_stack_len = 0;
}

void blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
void fd_blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }

void blake3_hasher_init_keyed(blake3_hasher *self,
const uint8_t key[BLAKE3_KEY_LEN]) {
void fd_blake3_hasher_init_keyed(blake3_hasher *self,
const uint8_t key[BLAKE3_KEY_LEN]) {
uint32_t key_words[8];
load_key_words(key, key_words);
hasher_init_base(self, key_words, KEYED_HASH);
}

void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
size_t context_len) {
void fd_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
size_t context_len) {
blake3_hasher context_hasher;
hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT);
blake3_hasher_update(&context_hasher, context, context_len);
fd_blake3_hasher_update(&context_hasher, context, context_len);
uint8_t context_key[BLAKE3_KEY_LEN];
blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
fd_blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
uint32_t context_key_words[8];
load_key_words(context_key, context_key_words);
hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL);
}

void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
blake3_hasher_init_derive_key_raw(self, context, strlen(context));
void fd_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
fd_blake3_hasher_init_derive_key_raw(self, context, strlen(context));
}

// As described in hasher_push_cv() below, we do "lazy merging", delaying
Expand Down Expand Up @@ -470,8 +470,8 @@ INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
#pragma GCC diagnostic pop
}

void blake3_hasher_update(blake3_hasher *self, const void *input,
size_t input_len) {
void fd_blake3_hasher_update(blake3_hasher *self, const void *input,
size_t input_len) {
// Explicitly checking for zero avoids causing UB by passing a null pointer
// to fd_memcpy. This comes up in practice with things like:
// std::vector<uint8_t> v;
Expand Down Expand Up @@ -579,13 +579,13 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
}
}

void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
size_t out_len) {
blake3_hasher_finalize_seek(self, 0, out, out_len);
void fd_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
size_t out_len) {
fd_blake3_hasher_finalize_seek(self, 0, out, out_len);
}

void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
uint8_t *out, size_t out_len) {
void fd_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
uint8_t *out, size_t out_len) {
// Explicitly checking for zero avoids causing UB by passing a null pointer
// to fd_memcpy. This comes up in practice with things like:
// std::vector<uint8_t> v;
Expand Down Expand Up @@ -631,7 +631,7 @@ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
output_root_bytes(&output, seek, out, out_len);
}

void blake3_hasher_reset(blake3_hasher *self) {
void fd_blake3_hasher_reset(blake3_hasher *self) {
chunk_state_reset(&self->chunk, self->key, 0);
self->cv_stack_len = 0;
}
28 changes: 14 additions & 14 deletions src/ballet/blake3/blake3.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,20 @@ typedef struct {
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
} blake3_hasher;

const char *blake3_version(void);
void blake3_hasher_init(blake3_hasher *self);
void blake3_hasher_init_keyed(blake3_hasher *self,
const uint8_t key[BLAKE3_KEY_LEN]);
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
size_t context_len);
void blake3_hasher_update(blake3_hasher *self, const void *input,
size_t input_len);
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
size_t out_len);
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
uint8_t *out, size_t out_len);
void blake3_hasher_reset(blake3_hasher *self);
const char *fd_blake3_version(void);
void fd_blake3_hasher_init(blake3_hasher *self);
void fd_blake3_hasher_init_keyed(blake3_hasher *self,
const uint8_t key[BLAKE3_KEY_LEN]);
void fd_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
void fd_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
size_t context_len);
void fd_blake3_hasher_update(blake3_hasher *self, const void *input,
size_t input_len);
void fd_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
size_t out_len);
void fd_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
uint8_t *out, size_t out_len);
void fd_blake3_hasher_reset(blake3_hasher *self);

#ifdef __cplusplus
}
Expand Down
46 changes: 23 additions & 23 deletions src/ballet/blake3/blake3_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,10 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
}

static
void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
void fd_blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
__m256i h_vecs[8] = {
set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
Expand Down Expand Up @@ -291,27 +291,27 @@ void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
}

#if FD_HAS_AVX
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out);
#else
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
void fd_blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out);
#else
void fd_blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out);
#endif /* FD_HAS_AVX */

void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out) {
void fd_blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out) {
while (num_inputs >= DEGREE) {
blake3_hash8_avx2(inputs, blocks, key, counter, increment_counter, flags,
flags_start, flags_end, out);
fd_blake3_hash8_avx2(inputs, blocks, key, counter, increment_counter, flags,
flags_start, flags_end, out);
if (increment_counter) {
counter += DEGREE;
}
Expand All @@ -320,11 +320,11 @@ void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
out = &out[DEGREE * BLAKE3_OUT_LEN];
}
#if FD_HAS_AVX
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
fd_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
#else
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
fd_blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
#endif
}
Loading

0 comments on commit eef2f7e

Please sign in to comment.