From cc35fd2a4fe5e807497bcb96f7a1b3db168a7666 Mon Sep 17 00:00:00 2001
From: dbuscombe-usgs <daniel.buscombe@nau.edu>
Date: Tue, 22 Sep 2020 11:51:56 -0700
Subject: [PATCH] 9/22/20

---
 README.md        | 15 ++++++++++++++-
 download_data.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 download_data.py

diff --git a/README.md b/README.md
index dec7347..32a3d87 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,15 @@
 # mlmondays_data_imrecog
-Datasets for the 'image recognition' ML-Mondays lessons 
+Datasets for the 'image recognition' ML-Mondays lessons
+
+
+Activate the `mlmondays` conda environment
+
+`conda activate mlmondays`
+
+Run the providing script:
+
+`python download_data.py`
+
+Deactivate the `mlmondays` conda environment
+
+`conda deactivate mlmondays`
diff --git a/download_data.py b/download_data.py
new file mode 100644
index 0000000..6d8ba1a
--- /dev/null
+++ b/download_data.py
@@ -0,0 +1,37 @@
+
+import os, zipfile
+import tensorflow as tf
+
+os.mkdir('data')
+os.mkdir('data/tamucc')
+
+
+folders_to_extract_to = [
+'./data',
+'./data/tamucc',
+'./data/tamucc',
+'./data/tamucc',
+'./data/tamucc',
+'./data/tamucc',
+]
+
+files_to_download = [
+'nwpu.zip',
+'tamucc_full_2class.zip',
+'tamucc_full_4class.zip',
+'tamucc_subset_2class.zip',
+'tamucc_subset_3class.zip',
+'tamucc_subset_4class.zip',
+]
+
+
+for k in range(len(files_to_download)):
+    file = files_to_download[k]
+    folder = folders_to_extract_to[k]
+    url = "https://github.com/dbuscombe-usgs/mlmondays_data_imrecog/releases/download/0.1.0/"+file
+    filename = os.path.join(os.getcwd(), file)
+    print("Downloading %s ... " % (filename))
+    tf.keras.utils.get_file(filename, url)
+    print("Unzipping to %s ... " % (folder))
+    with zipfile.ZipFile(file, "r") as z_fp:
+        z_fp.extractall("./"+folder)