From cc35fd2a4fe5e807497bcb96f7a1b3db168a7666 Mon Sep 17 00:00:00 2001 From: dbuscombe-usgs Date: Tue, 22 Sep 2020 11:51:56 -0700 Subject: [PATCH] 9/22/20 --- README.md | 15 ++++++++++++++- download_data.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 download_data.py diff --git a/README.md b/README.md index dec7347..32a3d87 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,15 @@ # mlmondays_data_imrecog -Datasets for the 'image recognition' ML-Mondays lessons +Datasets for the 'image recognition' ML-Mondays lessons + + +Activate the `mlmondays` conda environment + +`conda activate mlmondays` + +Run the providing script: + +`python download_data.py` + +Deactivate the `mlmondays` conda environment + +`conda deactivate mlmondays` diff --git a/download_data.py b/download_data.py new file mode 100644 index 0000000..6d8ba1a --- /dev/null +++ b/download_data.py @@ -0,0 +1,37 @@ + +import os, zipfile +import tensorflow as tf + +os.mkdir('data') +os.mkdir('data/tamucc') + + +folders_to_extract_to = [ +'./data', +'./data/tamucc', +'./data/tamucc', +'./data/tamucc', +'./data/tamucc', +'./data/tamucc', +] + +files_to_download = [ +'nwpu.zip', +'tamucc_full_2class.zip', +'tamucc_full_4class.zip', +'tamucc_subset_2class.zip', +'tamucc_subset_3class.zip', +'tamucc_subset_4class.zip', +] + + +for k in range(len(files_to_download)): + file = files_to_download[k] + folder = folders_to_extract_to[k] + url = "https://github.com/dbuscombe-usgs/mlmondays_data_imrecog/releases/download/0.1.0/"+file + filename = os.path.join(os.getcwd(), file) + print("Downloading %s ... " % (filename)) + tf.keras.utils.get_file(filename, url) + print("Unzipping to %s ... " % (folder)) + with zipfile.ZipFile(file, "r") as z_fp: + z_fp.extractall("./"+folder)