From f6f7d8fb2d81c5b28217bd881e30ee7691cfe59a Mon Sep 17 00:00:00 2001 From: Anastasiia Sliusar Date: Tue, 28 Jan 2025 13:38:37 +0100 Subject: [PATCH 1/6] Fix of 'no memory' error --- unpack.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/unpack.c b/unpack.c index 8756e91..b14242f 100644 --- a/unpack.c +++ b/unpack.c @@ -137,11 +137,17 @@ char* write_to_temp_file(uint8_t* data, size_t size) { free(temp_file_name); return NULL; } + if (fclose(temp_file) != 0) { + perror("Failed to close temporary file"); + unlink(temp_file_name); + free(temp_file_name); + return NULL; + } - fclose(temp_file); return temp_file_name; } + EMSCRIPTEN_KEEPALIVE ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { struct archive* archive; @@ -152,14 +158,15 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { char buff[buffsize]; size_t total_size = 0; const char *error_message; + size_t files_struct_length = 1; - FileData* files = malloc(sizeof(FileData) * (files_count + 1)); - + FileData* files = malloc(sizeof(FileData) * files_struct_length); if (!files) { printf("Failed to allocate memory for files array\n"); return NULL; } + ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive)); if (!result) { free(files); @@ -177,19 +184,29 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { error_message = "Failed to create temporary file"; return error_handler(result, error_message, archive); } - archive = archive_read_new(); archive_read_support_filter_all(archive); archive_read_support_format_raw(archive); - if (archive_read_open_filename(archive, temp_file_name, inputSize) != ARCHIVE_OK) { + if (archive_read_open_filename(archive, temp_file_name, 1024 * 1024) != ARCHIVE_OK) { unlink(temp_file_name); free(temp_file_name); free(files); return error_handler(result, archive_error_string(archive), archive); } - while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { + + if (files_count == files_struct_length) { + files_struct_length *= 2; + FileData* temp = realloc(files, sizeof(FileData) * files_struct_length); + if (!temp) { + free(files); + error_message = "Failed to reallocate memory for files"; + return error_handler(result, error_message, archive); + } + files = temp; + } + const char* filename = archive_entry_pathname(entry); if (!filename) filename = "decompression"; From 656d17cbd5d74b0b79d82e17c37f59110feec46c Mon Sep 17 00:00:00 2001 From: Anastasiia Sliusar Date: Thu, 30 Jan 2025 14:07:19 +0100 Subject: [PATCH 2/6] Fix memory leaks for big files during decompression --- unpack.c | 143 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 108 insertions(+), 35 deletions(-) diff --git a/unpack.c b/unpack.c index b14242f..773d242 100644 --- a/unpack.c +++ b/unpack.c @@ -5,6 +5,7 @@ #include #include #include +#include typedef struct { char* filename; @@ -19,6 +20,12 @@ typedef struct { char error_message[256]; } ExtractedArchive; +double get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec) + tv.tv_usec / 1000000.0; +} + ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) { if (!result || !archive) { @@ -113,7 +120,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { } char* write_to_temp_file(uint8_t* data, size_t size) { - char* temp_file_name = strdup("/tmp/decompressionXXXXXX"); + char* temp_file_name = strdup("/tmp/dddhecompressionXXXXXX"); int fd = mkstemp(temp_file_name); if (fd == -1) { perror("Failed to create temporary file for decompression file"); @@ -148,28 +155,41 @@ char* write_to_temp_file(uint8_t* data, size_t size) { } +EMSCRIPTEN_KEEPALIVE EMSCRIPTEN_KEEPALIVE ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { struct archive* archive; struct archive_entry* entry; size_t files_count = 0; + double start_time, end_time; - const size_t buffsize = 64 * 1024; - char buff[buffsize]; - size_t total_size = 0; + size_t total_size = 0; const char *error_message; size_t files_struct_length = 1; + size_t compression_ratio = 10; + size_t estimated_decompressed_size = inputSize * compression_ratio; + const size_t buffsize = estimated_decompressed_size; + char* buff = (char*)malloc(buffsize); + + if (!buff) { + printf("Failed to allocate memory for decompression buffer\n"); + return NULL; + } + + printf("inputSize %zd\n", inputSize); + printf("estimated_decompressed_size %zd\n", estimated_decompressed_size); FileData* files = malloc(sizeof(FileData) * files_struct_length); if (!files) { printf("Failed to allocate memory for files array\n"); + free(buff); return NULL; } - - + ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive)); if (!result) { free(files); + free(buff); return NULL; } @@ -181,81 +201,134 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { char* temp_file_name = write_to_temp_file(inputData, inputSize); if (!temp_file_name) { free(files); + free(buff); error_message = "Failed to create temporary file"; return error_handler(result, error_message, archive); } + archive = archive_read_new(); archive_read_support_filter_all(archive); archive_read_support_format_raw(archive); - - if (archive_read_open_filename(archive, temp_file_name, 1024 * 1024) != ARCHIVE_OK) { + + if (archive_read_open_filename(archive, temp_file_name, buffsize) != ARCHIVE_OK) { unlink(temp_file_name); free(temp_file_name); free(files); + free(buff); return error_handler(result, archive_error_string(archive), archive); } + + ssize_t iteration = 0; while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { - - if (files_count == files_struct_length) { - files_struct_length *= 2; - FileData* temp = realloc(files, sizeof(FileData) * files_struct_length); - if (!temp) { - free(files); - error_message = "Failed to reallocate memory for files"; + if (files_count + 1 > files_struct_length) { + files_struct_length *= 2; // double the length + FileData* oldfiles = files; + files= realloc(files, sizeof(FileData) * files_struct_length); + if (!files) { + unlink(temp_file_name); + free(temp_file_name); + result->fileCount = files_count; + result->files = oldfiles; // otherwise memory is lost, alternatively also everything can be freed. + error_message = "Memory allocation error for file data."; return error_handler(result, error_message, archive); - } - files = temp; + } } const char* filename = archive_entry_pathname(entry); - if (!filename) filename = "decompression"; + if (!filename) filename = "data"; files[files_count].filename = strdup(filename); - files[files_count].data = NULL; - files[files_count].data_size = 0; + files[files_count].data = malloc(estimated_decompressed_size); + + if (!files[files_count].data) { + free(files[files_count].filename); + unlink(temp_file_name); + free(temp_file_name); + free(buff); + files[files_count].filename = NULL; + result->fileCount = files_count; + result->files = files; // otherwise memory is lost, alternatively also everything can be freed. + + error_message = "Memory allocation error for file contents."; + return error_handler(result, error_message, archive); + } + files[files_count].data_size = buffsize; ssize_t ret; + total_size = 0; - for (;;) { + start_time = get_time(); + while (1) { + iteration++; ret = archive_read_data(archive, buff, buffsize); if (ret < 0) { for (size_t i = 0; i <= files_count; i++) { - free(files[i].filename); - free(files[i].data); - } - free(files); - result->files = NULL; - return error_handler(result, archive_error_string(archive), archive); + free(files[i].filename); + free(files[i].data); + } + free(files); + free(buff); + unlink(temp_file_name); + free(temp_file_name); + result->files = NULL; + result = error_handler(result, archive_error_string(archive), archive); + break; } if (ret == 0) { break; } + size_t sum = total_size + ret; + printf("sum %zd\n", sum); + + if (total_size + ret > estimated_decompressed_size) { + + size_t new_size = estimated_decompressed_size * 1.5; + void* new_data = realloc(files[files_count].data, new_size);//? + if (!new_data) { + for (size_t i = 0; i <= files_count; i++) { + free(files[i].filename); + free(files[i].data); + } + + result->files = NULL; + result->fileCount = 0; + free(files); + free(buff); + unlink(temp_file_name); + free(temp_file_name); + + error_message = "Memory allocation error"; + result = error_handler(result, error_message, archive); + break; + } - void* new_data = realloc(files[files_count].data, total_size + ret); - if (!new_data) { - free(files[files_count].data); - error_message = "Memory allocation error"; - return error_handler(result, error_message, archive); + files[files_count].data = new_data; + estimated_decompressed_size = new_size; } - files[files_count].data = new_data; memcpy(files[files_count].data + total_size, buff, ret); total_size += ret; } + end_time = get_time(); + printf("Execution time: %f seconds\n", end_time - start_time); + files[files_count].data_size = total_size; files_count++; } - + printf("Test: %zd\n", test); + printf("Iteration %zd\n", iteration); archive_read_free(archive); unlink(temp_file_name); free(temp_file_name); + free(buff); result->files = files; result->fileCount = files_count; - result->status = 1; + result->status = 1; return result; } + EMSCRIPTEN_KEEPALIVE ExtractedArchive* extract(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) { if (!decompressionOnly) { From 222404f4378c99804ec8ca062da0e36a1ecee866 Mon Sep 17 00:00:00 2001 From: Anastasiia Sliusar Date: Thu, 30 Jan 2025 14:24:13 +0100 Subject: [PATCH 3/6] Fix typos --- unpack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unpack.c b/unpack.c index 773d242..d218060 100644 --- a/unpack.c +++ b/unpack.c @@ -261,6 +261,7 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { while (1) { iteration++; ret = archive_read_data(archive, buff, buffsize); + printf("ret %zd\n", ret); if (ret < 0) { for (size_t i = 0; i <= files_count; i++) { free(files[i].filename); @@ -315,7 +316,6 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { files[files_count].data_size = total_size; files_count++; } - printf("Test: %zd\n", test); printf("Iteration %zd\n", iteration); archive_read_free(archive); unlink(temp_file_name); From d64a5463c5732a22d9c59ece6c81c7ce31aa6c4b Mon Sep 17 00:00:00 2001 From: Anastasiia Sliusar Date: Thu, 30 Jan 2025 14:25:59 +0100 Subject: [PATCH 4/6] Fix typos --- unpack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unpack.c b/unpack.c index d218060..7c169fb 100644 --- a/unpack.c +++ b/unpack.c @@ -120,7 +120,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { } char* write_to_temp_file(uint8_t* data, size_t size) { - char* temp_file_name = strdup("/tmp/dddhecompressionXXXXXX"); + char* temp_file_name = strdup("/tmp/decompressionXXXXXX"); int fd = mkstemp(temp_file_name); if (fd == -1) { perror("Failed to create temporary file for decompression file"); From 8c76165e092fb8e900d80578bb7c34dd6c26b977 Mon Sep 17 00:00:00 2001 From: Anastasiia Sliusar Date: Thu, 30 Jan 2025 17:18:16 +0100 Subject: [PATCH 5/6] Reduce iterations --- unpack.c | 60 ++++++++++++++++++++------------------------------------ 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/unpack.c b/unpack.c index 7c169fb..891777d 100644 --- a/unpack.c +++ b/unpack.c @@ -5,7 +5,6 @@ #include #include #include -#include typedef struct { char* filename; @@ -20,12 +19,6 @@ typedef struct { char error_message[256]; } ExtractedArchive; -double get_time() { - struct timeval tv; - gettimeofday(&tv, NULL); - return (tv.tv_sec) + tv.tv_usec / 1000000.0; -} - ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) { if (!result || !archive) { @@ -41,7 +34,7 @@ ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_mess } EMSCRIPTEN_KEEPALIVE -ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { +ExtractedArchive* extract_archive(uint8_t* input_data, size_t input_size ) { struct archive* archive; struct archive_entry* entry; size_t files_struct_length = 100; @@ -63,7 +56,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { archive_read_support_filter_all(archive); archive_read_support_format_all(archive); - if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) { + if (archive_read_open_memory(archive, input_data, input_size) != ARCHIVE_OK) { return error_handler(result,archive_error_string(archive), archive); } files = malloc(sizeof(FileData) * files_struct_length); @@ -154,20 +147,16 @@ char* write_to_temp_file(uint8_t* data, size_t size) { return temp_file_name; } - -EMSCRIPTEN_KEEPALIVE EMSCRIPTEN_KEEPALIVE -ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { +ExtractedArchive* decompression(uint8_t* input_data, size_t input_size) { struct archive* archive; struct archive_entry* entry; size_t files_count = 0; - double start_time, end_time; - size_t total_size = 0; const char *error_message; size_t files_struct_length = 1; size_t compression_ratio = 10; - size_t estimated_decompressed_size = inputSize * compression_ratio; + size_t estimated_decompressed_size = input_size * compression_ratio; const size_t buffsize = estimated_decompressed_size; char* buff = (char*)malloc(buffsize); @@ -176,9 +165,6 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { return NULL; } - printf("inputSize %zd\n", inputSize); - printf("estimated_decompressed_size %zd\n", estimated_decompressed_size); - FileData* files = malloc(sizeof(FileData) * files_struct_length); if (!files) { printf("Failed to allocate memory for files array\n"); @@ -198,7 +184,7 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { result->status = 1; result->error_message[0] = '\0'; - char* temp_file_name = write_to_temp_file(inputData, inputSize); + char* temp_file_name = write_to_temp_file(input_data, input_size); if (!temp_file_name) { free(files); free(buff); @@ -218,7 +204,6 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { return error_handler(result, archive_error_string(archive), archive); } - ssize_t iteration = 0; while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { if (files_count + 1 > files_struct_length) { files_struct_length *= 2; // double the length @@ -228,7 +213,7 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { unlink(temp_file_name); free(temp_file_name); result->fileCount = files_count; - result->files = oldfiles; // otherwise memory is lost, alternatively also everything can be freed. + result->files = oldfiles; error_message = "Memory allocation error for file data."; return error_handler(result, error_message, archive); } @@ -247,7 +232,7 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { free(buff); files[files_count].filename = NULL; result->fileCount = files_count; - result->files = files; // otherwise memory is lost, alternatively also everything can be freed. + result->files = files; error_message = "Memory allocation error for file contents."; return error_handler(result, error_message, archive); @@ -257,11 +242,9 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { ssize_t ret; total_size = 0; - start_time = get_time(); while (1) { - iteration++; ret = archive_read_data(archive, buff, buffsize); - printf("ret %zd\n", ret); + if (ret < 0) { for (size_t i = 0; i <= files_count; i++) { free(files[i].filename); @@ -278,11 +261,9 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { if (ret == 0) { break; } + size_t sum = total_size + ret; - printf("sum %zd\n", sum); - - if (total_size + ret > estimated_decompressed_size) { - + if (sum > estimated_decompressed_size) { size_t new_size = estimated_decompressed_size * 1.5; void* new_data = realloc(files[files_count].data, new_size);//? if (!new_data) { @@ -297,7 +278,6 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { free(buff); unlink(temp_file_name); free(temp_file_name); - error_message = "Memory allocation error"; result = error_handler(result, error_message, archive); break; @@ -305,23 +285,25 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { files[files_count].data = new_data; estimated_decompressed_size = new_size; + + } else if (sum>0 && sum < estimated_decompressed_size) { + memcpy(files[files_count].data + total_size, buff, ret); + total_size += ret; + break; } memcpy(files[files_count].data + total_size, buff, ret); total_size += ret; } - end_time = get_time(); - printf("Execution time: %f seconds\n", end_time - start_time); + files[files_count].data_size = total_size; files_count++; + free(buff); } - printf("Iteration %zd\n", iteration); archive_read_free(archive); unlink(temp_file_name); free(temp_file_name); - free(buff); - result->files = files; result->fileCount = files_count; result->status = 1; @@ -330,11 +312,11 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { EMSCRIPTEN_KEEPALIVE -ExtractedArchive* extract(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) { - if (!decompressionOnly) { - return extract_archive(inputData, inputSize); +ExtractedArchive* extract(uint8_t* input_data, size_t input_size, bool decompression_only ) { + if (!decompression_only) { + return extract_archive(input_data, input_size); } else { - return decompression(inputData, inputSize); + return decompression(input_data, input_size); } } From 7c723277e491b5fd75be9dee9241a965c1874cb0 Mon Sep 17 00:00:00 2001 From: Anastasiia Sliusar Date: Fri, 31 Jan 2025 12:57:49 +0100 Subject: [PATCH 6/6] Fixing memory leaks --- unpack.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/unpack.c b/unpack.c index 891777d..bf15025 100644 --- a/unpack.c +++ b/unpack.c @@ -5,6 +5,7 @@ #include #include #include +#include typedef struct { char* filename; @@ -19,6 +20,13 @@ typedef struct { char error_message[256]; } ExtractedArchive; + +double get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec) + tv.tv_usec / 1000000.0; +} + ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) { if (!result || !archive) { @@ -157,9 +165,15 @@ ExtractedArchive* decompression(uint8_t* input_data, size_t input_size) { size_t files_struct_length = 1; size_t compression_ratio = 10; size_t estimated_decompressed_size = input_size * compression_ratio; - const size_t buffsize = estimated_decompressed_size; + + /* 64KB, archive_read_data can do realloc too during reading data, + so data chunks size should be defined carefully. There is memory leaks with 4MB data chunck size + */ + const size_t buffsize = 65536; char* buff = (char*)malloc(buffsize); + double start_time, end_time; + if (!buff) { printf("Failed to allocate memory for decompression buffer\n"); return NULL; @@ -195,7 +209,11 @@ ExtractedArchive* decompression(uint8_t* input_data, size_t input_size) { archive = archive_read_new(); archive_read_support_filter_all(archive); archive_read_support_format_raw(archive); - + + /* Putting buffer size allows libarchive to read a file by data chunks. + This reduces memory leaks on libarchive side + */ + if (archive_read_open_filename(archive, temp_file_name, buffsize) != ARCHIVE_OK) { unlink(temp_file_name); free(temp_file_name); @@ -203,7 +221,7 @@ ExtractedArchive* decompression(uint8_t* input_data, size_t input_size) { free(buff); return error_handler(result, archive_error_string(archive), archive); } - + while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { if (files_count + 1 > files_struct_length) { files_struct_length *= 2; // double the length @@ -286,16 +304,11 @@ ExtractedArchive* decompression(uint8_t* input_data, size_t input_size) { files[files_count].data = new_data; estimated_decompressed_size = new_size; - } else if (sum>0 && sum < estimated_decompressed_size) { - memcpy(files[files_count].data + total_size, buff, ret); - total_size += ret; - break; } memcpy(files[files_count].data + total_size, buff, ret); total_size += ret; } - files[files_count].data_size = total_size; files_count++;