From 2af422e61e854915676212077131026357992545 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 1 Jul 2021 15:25:02 -0500 Subject: [PATCH 1/2] feat: add optional sha256 to example manifest this can prevent upcoming benchmarking tool from pounding relentlessly on activitysim_resources repo, by caching the large file downloads --- activitysim/cli/create.py | 44 ++++++++++++++++++---- activitysim/examples/example_manifest.yaml | 6 ++- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/activitysim/cli/create.py b/activitysim/cli/create.py index a0e9f638d..577843cc9 100644 --- a/activitysim/cli/create.py +++ b/activitysim/cli/create.py @@ -5,6 +5,7 @@ import glob import pkg_resources import yaml +import hashlib PACKAGE = 'activitysim' EXAMPLES_DIR = 'examples' @@ -116,17 +117,22 @@ def get_example(example_name, destination): # split include string into source/destination paths items = item.split() assets = items[0] - if len(items) == 2: + if len(items) == 3: + target_path = os.path.join(dest_path, items[1]) + sha256 = items[-1] + elif len(items) == 2: target_path = os.path.join(dest_path, items[-1]) + sha256 = None else: target_path = dest_path + sha256 = None if assets.startswith('http'): - download_asset(assets, target_path) + download_asset(assets, target_path, sha256) else: for asset_path in glob.glob(_example_path(assets)): - copy_asset(asset_path, target_path) + copy_asset(asset_path, target_path, dirs_exist_ok=True) print(f'copied! new project files are in {os.path.abspath(dest_path)}') @@ -135,22 +141,46 @@ def get_example(example_name, destination): print(instructions) -def copy_asset(asset_path, target_path): +def copy_asset(asset_path, target_path, dirs_exist_ok=False): print(f'copying {os.path.basename(asset_path)} ...') if os.path.isdir(asset_path): target_path = os.path.join(target_path, os.path.basename(asset_path)) - shutil.copytree(asset_path, target_path) + shutil.copytree(asset_path, target_path, dirs_exist_ok=dirs_exist_ok) else: shutil.copy(asset_path, target_path) -def download_asset(url, target_path): +def download_asset(url, target_path, sha256=None): - print(f'downloading {os.path.basename(target_path)} ...') + if sha256 and os.path.isfile(target_path): + computed_sha256 = sha256_checksum(target_path) + if sha256 == computed_sha256: + print(f'not re-downloading existing {os.path.basename(target_path)} ...') + return + else: + print(f're-downloading existing {os.path.basename(target_path)} ...') + print(f' expected checksum {sha256}') + print(f' computed checksum {computed_sha256}') + else: + print(f'downloading {os.path.basename(target_path)} ...') with requests.get(url, stream=True) as r: r.raise_for_status() with open(target_path, 'wb') as f: for chunk in r.iter_content(chunk_size=None): f.write(chunk) + computed_sha256 = sha256_checksum(target_path) + if sha256 and sha256 != computed_sha256: + raise ValueError( + f"downloaded {os.path.basename(target_path)} has incorrect checksum\n" + f" expected checksum {sha256}\n" + f" computed checksum {computed_sha256}" + ) + +def sha256_checksum(filename, block_size=65536): + sha256 = hashlib.sha256() + with open(filename, 'rb') as f: + for block in iter(lambda: f.read(block_size), b''): + sha256.update(block) + return sha256.hexdigest() diff --git a/activitysim/examples/example_manifest.yaml b/activitysim/examples/example_manifest.yaml index 0bae5f971..400b7053f 100644 --- a/activitysim/examples/example_manifest.yaml +++ b/activitysim/examples/example_manifest.yaml @@ -32,16 +32,20 @@ include: - example_mtc/configs - example_mtc/configs_mp - - example_mtc/data + # example_mtc/data - example_mtc/output - https://media.githubusercontent.com/media/activitysim/activitysim_resources/master/mtc_data_full/skims.omx data/skims.omx + 04bddb2dd6b829a2ce25a27369d3276143fa9a354989ebd30ed9bba92f8e9bfb - https://media.githubusercontent.com/media/activitysim/activitysim_resources/master/mtc_data_full/households.csv data/households.csv + 77bb2870677ebb430f3d7a813528445b1aea6d66096b9c03ace7201b901a527c - https://media.githubusercontent.com/media/activitysim/activitysim_resources/master/mtc_data_full/persons.csv data/persons.csv + 9b7e1c9972d5f16e06cf5baf8d552935cb4c9745c4da1d5743f8a76a8f5f5655 - https://media.githubusercontent.com/media/activitysim/activitysim_resources/master/mtc_data_full/land_use.csv data/land_use.csv + fac71207925a34c32b956632fe375814e42860624a99f88401c42317af0fc203 - name: example_mtc_sf description: San Francisco MTC dataset with 190 zones, 400k households and 900k persons From e5a53ecc43cdc19f78b750277d2b3bbf89bede74 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 1 Jul 2021 18:04:33 -0500 Subject: [PATCH 2/2] make target path as needed --- activitysim/cli/create.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/cli/create.py b/activitysim/cli/create.py index 577843cc9..0dab8aead 100644 --- a/activitysim/cli/create.py +++ b/activitysim/cli/create.py @@ -153,7 +153,7 @@ def copy_asset(asset_path, target_path, dirs_exist_ok=False): def download_asset(url, target_path, sha256=None): - + os.makedirs(os.path.dirname(target_path), exist_ok=True) if sha256 and os.path.isfile(target_path): computed_sha256 = sha256_checksum(target_path) if sha256 == computed_sha256: