Skip to content

Commit

Permalink
file: fixed the content length inference approach
Browse files Browse the repository at this point in the history
Signed-off-by: Leonardo Alminana <[email protected]>
  • Loading branch information
leonardo-albertovich committed Apr 3, 2024
1 parent 0a7f118 commit 55c578a
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 30 deletions.
1 change: 1 addition & 0 deletions include/chunkio/cio_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct cio_file {
HANDLE backing_file;
HANDLE backing_mapping;
#endif
int taint_flag; /* content modification flag */
/* cached addr */
char *st_content;
crc_t crc_cur; /* crc: current value calculated */
Expand Down
65 changes: 38 additions & 27 deletions include/chunkio/cio_file_st.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,36 @@ static inline ssize_t cio_file_st_infer_content_len(char *map, size_t size)
return content_length;
}

/* Set content length */
static inline void cio_file_st_set_content_len(char *map, uint32_t len)
{
uint8_t *content_length_buffer;

content_length_buffer = (uint8_t *) &map[CIO_FILE_CONTENT_LENGTH_OFFSET];

content_length_buffer[0] = (uint8_t) ((len & 0xFF000000) >> 24);
content_length_buffer[1] = (uint8_t) ((len & 0x00FF0000) >> 16);
content_length_buffer[2] = (uint8_t) ((len & 0x0000FF00) >> 8);
content_length_buffer[3] = (uint8_t) ((len & 0x000000FF) >> 0);
}

/* Get content length */
static inline ssize_t cio_file_st_get_content_len(char *map, size_t size,
size_t page_size)
static inline ssize_t cio_file_st_get_content_len(char *map,
size_t size,
size_t page_size,
int tainted_data_flag)
{
uint8_t *content_length_buffer;
uint8_t *content_buffer;
ssize_t content_length;
ssize_t content_offset;

if (size < CIO_FILE_HEADER_MIN) {
return -1;
}

content_offset = CIO_FILE_CONTENT_OFFSET + 2 + cio_file_st_get_meta_len(map);

content_length_buffer = (uint8_t *) &map[CIO_FILE_CONTENT_LENGTH_OFFSET];

content_length = (ssize_t) (((uint32_t) content_length_buffer[0]) << 24) |
Expand All @@ -134,37 +153,29 @@ static inline ssize_t cio_file_st_get_content_len(char *map, size_t size,
* previous versions of chunkio that didn't include the content length
* as part of the headers.
*
* The reason why we need to ensure that the file size is larger than 4096
* is that this is the minimal expected page size which is the unit used
* to initialize chunk files when they are created.
*
* In doing so, we effectively avoid returning bogus results when loading
* newly created, non trimmed files while at the same time retaining the
* capability of loading legacy files (that don't have a content size)
* that are larger than 4096 bytes.
* tainted_data_flag is used to differentiate non trimmed files being
* loaded from files whoses chunk data is grown over the threshold and
* shrinked by the filter stack.
*
* The only caveat is that trimmed files
* Because even when the content size is set to zero the data is not
* zeroed out (nor is the file shrinked) we can compare the first
* byte of the content section against zero to ensure that it's a
* valid msgpack serialized payload.
*/
if (content_length == 0 &&
size > 0 &&
size != page_size) {
content_length = cio_file_st_infer_content_len(map, size);
}

return content_length;
}
if (!tainted_data_flag &&
content_length == 0 &&
size > content_offset) {
content_buffer = (uint8_t *) &map[content_offset];

/* Set content length */
static inline void cio_file_st_set_content_len(char *map, uint32_t len)
{
uint8_t *content_length_buffer;
if (content_buffer[0] != 0x00) {
content_length = cio_file_st_infer_content_len(map, size);

content_length_buffer = (uint8_t *) &map[CIO_FILE_CONTENT_LENGTH_OFFSET];
cio_file_st_set_content_len(map, content_length);
}
}

content_length_buffer[0] = (uint8_t) ((len & 0xFF000000) >> 24);
content_length_buffer[1] = (uint8_t) ((len & 0x00FF0000) >> 16);
content_length_buffer[2] = (uint8_t) ((len & 0x0000FF00) >> 8);
content_length_buffer[3] = (uint8_t) ((len & 0x000000FF) >> 0);
return content_length;
}

#endif
13 changes: 10 additions & 3 deletions src/cio_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,15 @@ void cio_file_calculate_checksum(struct cio_file *cf, crc_t *out)

content_length = cio_file_st_get_content_len(cf->map,
cf->fs_size,
cf->page_size);
cf->page_size,
cf->taint_flag);

if (content_length > 0) {
len += content_length;
}

in_data = (unsigned char *) cf->map + CIO_FILE_CONTENT_OFFSET;

val = cio_crc32_update(cf->crc_cur, in_data, len);
*out = val;
}
Expand Down Expand Up @@ -236,7 +238,8 @@ static int cio_file_format_check(struct cio_chunk *ch,
/* Expected / logical file size verification */
content_length = cio_file_st_get_content_len(cf->map,
cf->fs_size,
cf->page_size);
cf->page_size,
cf->taint_flag);

if (content_length == -1) {
cio_log_debug(ch->ctx, "[cio file] truncated header (%zu / %zu) %s",
Expand Down Expand Up @@ -420,7 +423,8 @@ static int mmap_file(struct cio_ctx *ctx, struct cio_chunk *ch, size_t size)
if (fs_size > 0) {
content_size = cio_file_st_get_content_len(cf->map,
fs_size,
cf->page_size);
cf->page_size,
cf->taint_flag);

if (content_size == -1) {
cio_error_set(ch, CIO_ERR_BAD_FILE_SIZE);
Expand Down Expand Up @@ -653,6 +657,7 @@ struct cio_file *cio_file_open(struct cio_ctx *ctx,
cf->realloc_size = CIO_REALLOC_HINT_MIN;
}

cf->taint_flag = CIO_FALSE;
cf->st_content = NULL;
cf->crc_cur = cio_crc32_init();
cf->path = path;
Expand Down Expand Up @@ -1028,6 +1033,8 @@ int cio_file_write(struct cio_chunk *ch, const void *buf, size_t count)

cio_file_st_set_content_len(cf->map, cf->data_size);

cf->taint_flag = CIO_TRUE;

return 0;
}

Expand Down

0 comments on commit 55c578a

Please sign in to comment.