From 8405dfaae67901f6efb701756df298d0b93786da Mon Sep 17 00:00:00 2001 From: Jeff Kent Date: Wed, 24 Feb 2021 17:53:01 -0600 Subject: [PATCH] binary incompat, add sorted offset table for iterating files, limit num_objects to 64k --- README.md | 22 +++-- cmake/include.cmake | 36 +++++--- docs/api-reference/fs.rst | 1 + espfs_defaults.yaml | 13 +-- include/libespfs/espfs.h | 15 +++- include/libespfs/espfs_format.h | 21 +++-- src/espfs.c | 37 +++++--- src/espfs_priv.h | 1 + tools/mkespfsimage.py | 147 ++++++++++++++++++++------------ tools/pathlist.py | 57 +++++++++++++ 10 files changed, 254 insertions(+), 96 deletions(-) create mode 100644 tools/pathlist.py diff --git a/README.md b/README.md index b7b75e1..1717972 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# About libespfs +# About Libespfs Libespfs is a read-only filesystem component designed for [ESP-IDF](https://github.com/espressif/esp-idf) and @@ -13,7 +13,7 @@ Libespfs can be used in other projects though, and works fine on Linux. There is a test Linux program in `tools/test` to read files from an espfs image. -# Getting started +# Getting Started To use this component, make a components directory in your project's root directory and within that directory run: @@ -25,17 +25,26 @@ You can generate a filesystem using `tools/mkespfsiage.py`. The tool takes two arguments, ROOT, the directory containing the files to generate from, and IMAGE, the output file for the image. The script references an espfs.yaml file in the image ROOT, with the default settings to not add it to the image. The -yaml file the various preprocessors and compressors to run while building the -image. Example: +yaml file defines filters for the various preprocessors and compressors to run +while building the image. The espfs.yaml file overrides any settings in the +espfs_defaults.yaml file included with libespfs. Example: ```yaml -paths: +filters: '*.html': ['html-minifier', 'gzip'] + '*.zip': no-compress '*': heatshrink ``` +There are 5 preprocessors (babel-convert, babel-minifiy, html-minifier, +uglifycss, and uglifyjs) and there are two compressors (gzip and heatshrink). +These can be prefixed with 'no-' to disable them in more specific filters. +There is also the commands 'discard' to prevent files from being added to the +image, 'skip' to cancel all processing, 'no-preprocessing', and +'no-compression'. + You can add your own preprocessors as well. Look at the espfs_default.yaml -within the component as an example. +within libespfs for an example. ## Building an espfs image @@ -118,6 +127,7 @@ fclose) functions to access files. ## Raw interface ```C +const char *espfs_get_path(espfs_fs_t *fs, uint16_t index); bool espfs_stat(espfs_fs_t *fs, const char *path, espfs_stat_t *s); espfs_file_t *espfs_fopen(espfs_fs_t *fs, const char *path); void espfs_fclose(espfs_file_t *f); diff --git a/cmake/include.cmake b/cmake/include.cmake index e719c72..4fe03a9 100644 --- a/cmake/include.cmake +++ b/cmake/include.cmake @@ -50,10 +50,18 @@ function(define_target_espfs target dir output) get_filename_component(output_dir ${output} DIRECTORY) add_custom_target(${target} - BYPRODUCTS ${output} + BYPRODUCTS ${dir}/espfs.paths DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/libespfs.dir/requirements.stamp COMMAND ${CMAKE_COMMAND} -E make_directory ${output_dir} + COMMAND ${python} ${libespfs_DIR}/tools/pathlist.py ${dir} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMANT "Updating espfs.paths for ${target}" + VERBATIM + ) + + add_custom_command(OUTPUT ${output} COMMAND ${python} ${libespfs_DIR}/tools/mkespfsimage.py ${dir} ${output} + DEPENDS ${dir}/espfs.paths WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMENT "Building espfs binary ${output}" USES_TERMINAL @@ -72,30 +80,38 @@ function(target_add_espfs target name) file(RELATIVE_PATH dir ${PROJECT_SOURCE_DIR} ${dir}) endif() - set(output ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${target}.dir/${name}.bin) + set(output ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${target}.dir/${name}) file(RELATIVE_PATH rel_output ${CMAKE_CURRENT_BINARY_DIR} ${output}) get_filename_component(output_dir ${output} DIRECTORY) - add_custom_target(espfs_image_${name} ALL - BYPRODUCTS ${output} + add_custom_target(espfs_image_${name} + BYPRODUCTS ${dir}/espfs.paths DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/libespfs.dir/requirements.stamp COMMAND ${CMAKE_COMMAND} -E make_directory ${output_dir} - COMMAND ${python} ${libespfs_DIR}/tools/mkespfsimage.py ${dir} ${output} + COMMAND ${python} ${libespfs_DIR}/tools/pathlist.py ${dir} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMENT "Updating pathlist for espfs_image_${name}" + VERBATIM + ) + add_dependencies(${target} espfs_image_${name}) + + add_custom_command(OUTPUT ${output}.bin + COMMAND ${python} ${libespfs_DIR}/tools/mkespfsimage.py ${dir} ${output}.bin + DEPENDS ${dir}/espfs.paths WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMENT "Building espfs binary ${rel_output}" USES_TERMINAL VERBATIM ) - add_dependencies(${target} espfs_image_${name}) - add_custom_command(OUTPUT ${output}.c - COMMAND ${python} ${libespfs_DIR}/tools/bin2c.py ${output} ${output}.c - DEPENDS ${output} + add_custom_command(OUTPUT ${output}.bin.c + COMMAND ${python} ${libespfs_DIR}/tools/bin2c.py ${output}.bin ${output}.bin.c + DEPENDS ${output}.bin COMMENT "Building source file ${rel_output}.c" VERBATIM ) - target_sources(${target} PRIVATE ${output}.c) + target_sources(${target} PRIVATE ${output}.bin.c) endfunction() function(target_config_vars) diff --git a/docs/api-reference/fs.rst b/docs/api-reference/fs.rst index ee67ff9..c8a4208 100644 --- a/docs/api-reference/fs.rst +++ b/docs/api-reference/fs.rst @@ -8,6 +8,7 @@ Functions .. doxygenfunction:: espfs_init .. doxygenfunction:: espfs_deinit +.. doxygenfunction:: espfs_get_path .. doxygenfunction:: espfs_stat Structures diff --git a/espfs_defaults.yaml b/espfs_defaults.yaml index 333b1c1..93ea839 100644 --- a/espfs_defaults.yaml +++ b/espfs_defaults.yaml @@ -1,7 +1,4 @@ preprocessors: - gzip: - level: 9 - babel-convert: npm: - '@babel/core' @@ -26,13 +23,19 @@ preprocessors: command: npx uglifyjs compressors: + gzip: + level: 9 + heatshrink: window_sz2: 11 lookahead_sz2: 4 -paths: +filters: + 'espfs.paths': discard 'espfs.yaml': discard '*.css': uglifycss '*.html': html-minifier '*.js': [babel-convert, uglifyjs] - '*': heatshrink + '*.woff': no-compression + '*.woff2': no-compression + '*': [cache, heatshrink] diff --git a/include/libespfs/espfs.h b/include/libespfs/espfs.h index 2c35446..e4099da 100644 --- a/include/libespfs/espfs.h +++ b/include/libespfs/espfs.h @@ -35,7 +35,8 @@ enum espfs_stat_type_t { * \brief Object flags */ enum espfs_flags_t { - ESPFS_FLAG_GZIP = (1 << 0), + ESPFS_FLAG_GZIP = (1 << 0), + ESPFS_FLAG_CACHE = (1 << 1), }; /** @@ -61,6 +62,7 @@ struct espfs_config_t { * \brief Structure filled by \a espfs_stat and \a espfs_fstat functions */ struct espfs_stat_t { + uint16_t index; /**< file index */ espfs_stat_type_t type; /**< file type */ espfs_flags_t flags; /**< file flags */ espfs_compression_type_t compression; /**< compression type */ @@ -83,6 +85,16 @@ void espfs_deinit( espfs_fs_t *fs /** [in] espfs fs pointer */ ); +/** + * \brief Get path for sorted espfs object index + * + * \return path or NULL if the index is invalid + */ +const char *espfs_get_path( + espfs_fs_t *fs, /** [in] espfs fs pointer */ + uint16_t index /** [in] espfs file index */ +); + /** * \brief Get information about an espfs object * @@ -116,7 +128,6 @@ void espfs_fclose( */ void espfs_fstat( espfs_file_t *f, /** [in] espfs file */ - const char *path, /** [in] espfs path */ espfs_stat_t *s /** [out] stat structure */ ); diff --git a/include/libespfs/espfs_format.h b/include/libespfs/espfs_format.h index 73f2f99..488d4f9 100644 --- a/include/libespfs/espfs_format.h +++ b/include/libespfs/espfs_format.h @@ -11,13 +11,13 @@ * \brief Magic number used in the espfs file header */ #define ESPFS_MAGIC 0x2B534645 /** EFS+ */ -#define ESPFS_VERSION_MAJOR 0 -#define ESPFS_VERSION_MINOR 1 +#define ESPFS_VERSION_MAJOR 1 +#define ESPFS_VERSION_MINOR 0 typedef struct espfs_fs_header_t espfs_fs_header_t; typedef struct espfs_hashtable_entry_t espfs_hashtable_entry_t; +typedef struct espfs_sorttable_entry_t espfs_sorttable_entry_t; typedef struct espfs_object_header_t espfs_object_header_t; -typedef struct espfs_dir_header_t espfs_dir_header_t; typedef struct espfs_file_header_t espfs_file_header_t; typedef struct espfs_heatshrink_header_t espfs_heatshrink_header_t; typedef struct espfs_crc32_footer_t espfs_crc32_footer_t; @@ -27,8 +27,9 @@ struct espfs_fs_header_t { uint8_t len; uint8_t version_major; uint16_t version_minor; - uint32_t num_objects; uint32_t binary_len; + uint16_t num_objects; + uint16_t reserved; } __attribute__((packed)); struct espfs_hashtable_entry_t { @@ -36,14 +37,16 @@ struct espfs_hashtable_entry_t { uint32_t offset; } __attribute__((packed)); +struct espfs_sorttable_entry_t { + uint32_t offset; +} __attribute__((packed)); + struct espfs_object_header_t { uint8_t type; uint8_t len; + uint16_t index; uint16_t path_len; -} __attribute__((packed)); - -struct espfs_dir_header_t { - espfs_object_header_t object; + uint16_t reserved; } __attribute__((packed)); struct espfs_file_header_t { @@ -63,4 +66,4 @@ struct espfs_heatshrink_header_t { struct espfs_crc32_footer_t { uint32_t crc32; -} __attribute__((packed)); \ No newline at end of file +} __attribute__((packed)); diff --git a/src/espfs.c b/src/espfs.c index d6253d3..eb9f46b 100644 --- a/src/espfs.c +++ b/src/espfs.c @@ -78,7 +78,9 @@ espfs_fs_t *espfs_init(espfs_config_t *conf) goto err_out; } - fs->hashtable = (void *) fs->header + fs->header->len; + fs->hashtable = (const void *) fs->header + fs->header->len; + fs->sorttable = (const void *) fs->hashtable + + (sizeof(espfs_hashtable_entry_t) * fs->header->num_objects); return fs; @@ -99,6 +101,20 @@ void espfs_deinit(espfs_fs_t *fs) free(fs); } +const char *espfs_get_path(espfs_fs_t *fs, uint16_t index) +{ + assert(fs != NULL); + + if (index >= fs->header->num_objects) { + return NULL; + } + + const espfs_sorttable_entry_t *entry = fs->sorttable + index; + const espfs_object_header_t *object = (const void *) fs->header + + entry->offset; + return (const char *) object + object->len; +} + static uint32_t hash_path(const char *path) { uint32_t hash = 5381; @@ -113,7 +129,7 @@ static uint32_t hash_path(const char *path) return hash; } -static void *find_object(espfs_fs_t *fs, const char *path) +static const void *find_object(espfs_fs_t *fs, const char *path) { assert(fs != NULL); @@ -169,7 +185,7 @@ static void *find_object(espfs_fs_t *fs, const char *path) do { if (middle != skip) { object = (void *) fs->header + entry->offset; - if (strcmp(path, (char *) object + object->len) == 0) { + if (strcmp(path, (const char *) object + object->len) == 0) { ESPFS_LOGV(__func__, "object %d", middle); return object; } @@ -186,7 +202,7 @@ bool espfs_stat(espfs_fs_t *fs, const char *path, espfs_stat_t *stat) { assert(fs != NULL); - espfs_object_header_t *object = find_object(fs, path); + const espfs_object_header_t *object = find_object(fs, path); if (object == NULL) { ESPFS_LOGD(__func__, "object not found: %s", path); return false; @@ -194,9 +210,9 @@ bool espfs_stat(espfs_fs_t *fs, const char *path, espfs_stat_t *stat) memset(stat, 0, sizeof(espfs_stat_t)); stat->type = object->type; + stat->index = object->index; if (object->type == ESPFS_TYPE_FILE) { - espfs_file_header_t *fh = (espfs_file_header_t *) object; - + const espfs_file_header_t *fh = (const espfs_file_header_t *) object; stat->flags = fh->flags; stat->compression = fh->compression; stat->size = fh->file_len; @@ -210,13 +226,13 @@ espfs_file_t *espfs_fopen(espfs_fs_t *fs, const char *path) { assert(fs != NULL); - espfs_object_header_t *object = find_object(fs, path); + const espfs_object_header_t *object = find_object(fs, path); if ((object == NULL) || (object->type != ESPFS_TYPE_FILE)) { ESPFS_LOGD(__func__, "file not found: %s", path); return NULL; } - espfs_file_header_t *fh = (espfs_file_header_t *) object; + const espfs_file_header_t *fh = (const espfs_file_header_t *) object; espfs_file_t *f = malloc(sizeof(espfs_file_t)); if (f == NULL) { @@ -276,12 +292,13 @@ void espfs_fclose(espfs_file_t *f) free(f); } -void espfs_fstat(espfs_file_t *f, const char *path, espfs_stat_t *stat) +void espfs_fstat(espfs_file_t *f, espfs_stat_t *stat) { assert(f != NULL); memset(stat, 0, sizeof(espfs_stat_t)); stat->type = f->fh->object.type; + stat->index = f->fh->object.index; stat->flags = f->fh->flags; stat->compression = f->fh->compression; stat->size = f->fh->file_len; @@ -355,12 +372,12 @@ ssize_t espfs_fread(espfs_file_t *f, void *buf, size_t len) if (remain == 0) { if (f->file_pos == f->fh->file_len) { - ESPFS_LOGD(__func__, "heatshrink decoder finished"); HSD_finish_res res = heatshrink_decoder_finish(hsd); if (res < 0) { ESPFS_LOGE(__func__, "heatshrink_decoder_finish"); return -1; } + ESPFS_LOGV(__func__, "heatshrink_decoder_finish"); } return decoded; } diff --git a/src/espfs_priv.h b/src/espfs_priv.h index bbed3a6..033b182 100644 --- a/src/espfs_priv.h +++ b/src/espfs_priv.h @@ -22,6 +22,7 @@ struct espfs_fs_t { #endif const espfs_fs_header_t *header; const espfs_hashtable_entry_t *hashtable; + const espfs_sorttable_entry_t *sorttable; }; struct espfs_file_t { diff --git a/tools/mkespfsimage.py b/tools/mkespfsimage.py index ff5f53d..7cf26bb 100644 --- a/tools/mkespfsimage.py +++ b/tools/mkespfsimage.py @@ -16,23 +16,27 @@ script_dir = os.path.dirname(os.path.realpath(__file__)) -espfs_fs_header_t = Struct('= initial_len: + data = initial_data + data_len = initial_len + + if 'no-compression' not in actions: + if 'gzip' in actions and 'no-gzip' not in actions: + flags |= ESPFS_FLAG_GZIP + level = config['compressors']['gzip']['level'] + level = min(max(level, 0), 9) + data = gzip.compress(data, level) + elif 'heatshrink' in actions and 'no-heatshrink' not in actions: + compression = ESPFS_COMPRESSION_HEATSHRINK + window_sz2 = config['compressors']['heatshrink']['window_sz2'] + lookahead_sz2 = config['compressors']['heatshrink']['lookahead_sz2'] + data = espfs_heatshrink_header_t.pack(window_sz2, lookahead_sz2, + 0) + heatshrink2.compress(data, window_sz2 = window_sz2, + lookahead_sz2 = lookahead_sz2) data_len = len(data) - if compression and data_len >= file_len: + if data_len >= file_len: + flags &= ~ESPFS_FLAG_GZIP compression = ESPFS_COMPRESSION_NONE data = file_data - data_len = len(data) + data_len = file_len if initial_len < 1024: initial_len_str = '%d B' % (initial_len) @@ -195,8 +216,8 @@ def make_file_object(hash, path, data, actions={}): data = data.ljust((data_len + 3) // 4 * 4, b'\0') header = espfs_object_header_t.pack(ESPFS_TYPE_FILE, espfs_object_header_t.size + espfs_file_header_t.size, - len(path)) + espfs_file_header_t.pack(data_len, file_len, flags, - compression, 0) + index, len(path), 0) + espfs_file_header_t.pack(data_len, + file_len, flags, compression, 0) return header + path + data @@ -212,25 +233,33 @@ def main(): pathlist = make_pathlist(args.ROOT) npmset = set() + index = 0 for entry in pathlist[:]: - _, path, _, type, attributes = entry + path, attributes = entry attributes['actions'] = OrderedDict() - for pattern, actions in config['paths'].items(): + attributes['index'] = index + index += 1 + for pattern, actions in config['filters'].items(): if fnmatch(path, pattern): if 'discard' in actions: + index -= 1 pathlist.remove(entry) break + if 'skip' in actions: + break for action in actions: - if action in ('discard', 'gzip', 'heatshrink'): + if action in ('cache', 'gzip', 'heatshrink', + 'no-cache', 'no-compression', 'no-gzip', + 'no-heatshrink', 'no-preprocessing'): pass - elif action not in config['preprocessors']: + elif action in config['preprocessors']: + for npm in config['preprocessors'][action] \ + .get('npm', ()): + npmset.add(npm) + else: print('unknown action %s for %s' % (action, pattern), - file = sys.sterr) + file = sys.stderr) sys.exit(1) - else: - for npm in config['preprocessors'][action].get('npm', - ()): - npmset.add(npm) attributes['actions'][action] = None @@ -239,28 +268,38 @@ def main(): subprocess.check_call('npm install %s' % (npm), shell = True) num_objects = len(pathlist) - offset = espfs_fs_header_t.size + (espfs_hashtable_t.size * num_objects) + offset = espfs_fs_header_t.size + \ + (espfs_hashtable_entry_t.size * num_objects) + \ + (espfs_sorttable_entry_t.size * num_objects) hashtable = b'' + sorttable = bytearray(espfs_sorttable_entry_t.size * num_objects) objects = b'' - for hash, path, abspath, type, attributes in sorted(pathlist): + pathlist = sorted(pathlist, key = lambda e: (e[1]['hash'], e[0])) + for path, attributes in pathlist: + abspath = attributes['path'] + type = attributes['type'] + hash = attributes['hash'] if type == ESPFS_TYPE_DIR: - object = make_dir_object(hash, path) + object = make_dir_object(hash, path, attributes) elif type == ESPFS_TYPE_FILE: with open(abspath, 'rb') as f: data = f.read() - object = make_file_object(hash, path, data, attributes['actions']) + object = make_file_object(hash, path, data, attributes) else: print('unknown object type %d' % (type), file = sys.stderr) sys.exit(1) - hashtable += espfs_hashtable_t.pack(hash, offset) + hashtable += espfs_hashtable_entry_t.pack(hash, offset) + espfs_sorttable_entry_t.pack_into(sorttable, + espfs_sorttable_entry_t.size * attributes['index'], offset) objects += object offset += len(object) binary_len = offset + espfs_crc32_footer_t.size header = espfs_fs_header_t.pack(ESPFS_MAGIC, espfs_fs_header_t.size, - ESPFS_VERSION_MAJOR, ESPFS_VERSION_MINOR, num_objects, binary_len) - binary = header + hashtable + objects + ESPFS_VERSION_MAJOR, ESPFS_VERSION_MINOR, binary_len, + num_objects, 0) + binary = header + hashtable + sorttable + objects binary += espfs_crc32_footer_t.pack(crc32(binary) & 0xFFFFFFFF) with open(args.IMAGE, 'wb') as f: diff --git a/tools/pathlist.py b/tools/pathlist.py new file mode 100644 index 0000000..d530362 --- /dev/null +++ b/tools/pathlist.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +import bisect +import os +from argparse import ArgumentParser + +discard = ('espfs.paths') + +def make_pathlist(root): + pathlist = [] + for dir, _, files in os.walk(root, followlinks=True): + reldir = os.path.relpath(dir, root).replace('\\', '/').lstrip('.') \ + .lstrip('/') + if reldir: + bisect.insort(pathlist, reldir) + for file in files: + relfile = os.path.join(reldir, file).replace('\\', '/') \ + .lstrip('/') + bisect.insort(pathlist, relfile) + + for path in discard: + if path in pathlist: + pathlist.remove(path) + + return pathlist + +def main(): + parser = ArgumentParser() + parser.add_argument('ROOT') + args = parser.parse_args() + + filelist = os.path.join(args.ROOT, 'espfs.paths') + + oldpathstr = '' + oldpathtime = 0 + if os.path.exists(filelist): + with open(filelist) as f: + oldpathstr = f.read() + oldpathtime = os.path.getmtime(filelist) + + pathlist = make_pathlist(args.ROOT) + pathstr = '\n'.join(pathlist) + + update = False + if (oldpathstr != pathstr): + update = True + else: + for path in pathlist: + if os.path.getmtime(os.path.join(args.ROOT, path)) > oldpathtime: + update = True + break + + if update: + with open(filelist, 'w') as f: + f.write(pathstr) + +if __name__ == '__main__': + main()