From 94f1abd82bc52ffa24533c8c99adf52d437754d5 Mon Sep 17 00:00:00 2001 From: Carl Johnsen Date: Wed, 20 Mar 2024 10:20:24 +0100 Subject: [PATCH] #37 store and load partial now handles unaligned requests --- src/lib/cpp/cpu/connected_components.cc | 61 ++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/src/lib/cpp/cpu/connected_components.cc b/src/lib/cpp/cpu/connected_components.cc index 43f4ad7..4e943fa 100644 --- a/src/lib/cpp/cpu/connected_components.cc +++ b/src/lib/cpp/cpu/connected_components.cc @@ -105,13 +105,31 @@ std::vector load_file_strided(const std::string &path, const idx3d &disk_shap return data; } +// Assumes that `dst` and `dst + offset` is aligned to `disk_block_size` and that `n_elements` is a multiple of `disk_block_size`. template -void load_partial(T *__restrict__ dst, FILE *fp, const int64_t offset, const int64_t n_elements) { +void load_partial_aligned(T *__restrict__ dst, FILE *fp, const int64_t offset, const int64_t n_elements) { fseek(fp, offset*sizeof(T), SEEK_SET); int64_t n = fread((char *) dst, sizeof(T), n_elements, fp); assert(n == n_elements && "Failed to read all elements"); } +template +void load_partial(T *__restrict__ dst, FILE *fp, const int64_t offset, const int64_t n_elements) { + if (offset % disk_block_size == 0 && n_elements % disk_block_size == 0 && (int64_t) dst % disk_block_size == 0) { + load_partial_aligned(dst, fp, offset, n_elements); + return; + } + + // TODO doesn't work when offset > aligned_n_elements; it is built around loading from the beginning of the file. + int64_t + aligned_offset = (offset / disk_block_size) * disk_block_size, + aligned_n_elements = ((n_elements + disk_block_size - 1) / disk_block_size) * disk_block_size; + T *buffer = (T *) aligned_alloc(disk_block_size, aligned_n_elements * sizeof(T)); + load_partial_aligned(buffer, fp, aligned_offset, aligned_n_elements); + memcpy((char *) dst, (char *) buffer + offset - aligned_offset, n_elements*sizeof(T)); + free(buffer); +} + FILE* open_file_read(const std::string &path) { int fd = open(path.c_str(), O_RDONLY | O_DIRECT); return fdopen(fd, "rb"); @@ -218,13 +236,52 @@ void store_file_strided(const std::vector &data, const std::string &path, con store_file_strided(data.data(), path, disk_shape, shape, range, offset_global); } +// Assumes that `src` and `src + offset` is aligned to `disk_block_size` and that `n_elements` is a multiple of `disk_block_size`. template -void store_partial(const T *__restrict__ src, FILE *fp, const int64_t offset, const int64_t n_elements) { +void store_partial_aligned(const T *__restrict__ src, FILE *fp, const int64_t offset, const int64_t n_elements) { fseek(fp, offset*sizeof(T), SEEK_SET); int64_t n = fwrite((char *) src, sizeof(T), n_elements, fp); assert(n == n_elements && "Failed to write all elements"); } +template +void store_partial(const T *__restrict__ src, FILE *fp, const int64_t offset, const int64_t n_elements) { + if (offset % disk_block_size == 0 && n_elements % disk_block_size == 0 && (int64_t) src % disk_block_size == 0) { + store_partial_aligned(src, fp, offset, n_elements); + return; + } + + int64_t + buffer_start = offset % disk_block_size, + buffer_end = buffer_start + n_elements, + front_elements = disk_block_size - buffer_start, + back_elements = buffer_end % disk_block_size, + in_between = (n_elements - front_elements) - back_elements, + aligned_start = (offset / disk_block_size) * disk_block_size, + aligned_end = offset + n_elements + (disk_block_size - back_elements), + aligned_n_elements = aligned_end - aligned_start, + n_blocks = aligned_n_elements / disk_block_size; + + assert(front_elements + in_between + back_elements == n_elements && "Front, in-between and back elements don't add up to n_elements"); + assert(aligned_n_elements >= n_elements && "Aligned n_elements is smaller than n_elements"); + + T *buffer = (T *) aligned_alloc(disk_block_size, aligned_n_elements * sizeof(T)); + + if (front_elements != 0) { + load_file_no_alloc(buffer, fp, aligned_start, disk_block_size); + } + + if (back_elements != 0) { + load_file_no_alloc(buffer + buffer_end, fp, aligned_start + buffer_end, disk_block_size); + } + + memcpy((char *) buffer + buffer_start, (char *) src, n_elements * sizeof(T)); + + store_partial_aligned(buffer, fp, aligned_start, aligned_n_elements); + + free(buffer); +} + namespace cpu_par { void apply_renaming(std::vector &img, std::vector &to_rename) {