Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: unzip support for download #209

Merged
merged 12 commits into from
Oct 31, 2023
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](https://semver.org/).

## [0.X.X] - 2023-XX-XX
## [0.0.6] - 2023-XX-XX
* New Instruments
* MAVEN mag
* MAVEN SEP
* MAVEN in situ key parameters
* REACH Dosimeter
* New Features
* Allow files to be unzipped after download
* Bug Fixes
* Fix general clean routine to skip transformation matrices
* New window needs to be integer for calculate_imf_steadiness
Expand Down
69 changes: 62 additions & 7 deletions pysatNASA/instruments/methods/cdaweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
import os
import pandas as pds
import requests
import tempfile
from time import sleep
import xarray as xr
import zipfile

from bs4 import BeautifulSoup
from cdasws import CdasWs
Expand Down Expand Up @@ -471,7 +473,6 @@ def load_xarray(fnames, tag='', inst_id='',
return data, meta


# TODO(#103): Include support to unzip / untar files after download.
def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
remote_url='https://cdaweb.gsfc.nasa.gov'):
"""Download NASA CDAWeb data.
Expand Down Expand Up @@ -526,6 +527,13 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
start=date_array[0],
stop=date_array[-1])

# Create temproary directory if files need to be unzipped.
if 'zip_method' in inst_dict.keys():
zip_method = inst_dict['zip_method']
temp_dir = tempfile.TemporaryDirectory()
else:
zip_method = None

# Download only requested files that exist remotely
for date, fname in remote_files.items():
# Format files for specific dates and download location
Expand All @@ -548,18 +556,19 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
formatted_remote_dir.strip('/'),
fname))

saved_local_fname = os.path.join(data_path, fname)

# Perform download
logger.info(' '.join(('Attempting to download file for',
date.strftime('%d %B %Y'))))
try:
with requests.get(remote_path) as req:
if req.status_code != 404:
with open(saved_local_fname, 'wb') as open_f:
open_f.write(req.content)
logger.info('Successfully downloaded {:}.'.format(
saved_local_fname))
if zip_method:
get_file(req.content, data_path, fname,
temp_path=temp_dir.name, zip_method=zip_method)
else:
get_file(req.content, data_path, fname)
logger.info(''.join(('Successfully downloaded ',
fname, '.')))
else:
logger.info(' '.join(('File not available for',
date.strftime('%d %B %Y'))))
Expand All @@ -568,6 +577,52 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
date.strftime('%d %B %Y'))))
# Pause to avoid excessive pings to server
sleep(0.2)

if zip_method:
# Cleanup temporary directory
temp_dir.cleanup()

return


def get_file(remote_file, data_path, fname, temp_path=None, zip_method=None):
"""Retrieve a file, unzipping if necessary.

Parameters
----------
remote_file : file content
File content retireved via requests.
data_path : str
Path to pysat archival directory.
fname : str
Name of file on the remote server.
temp_path : str
Path to temporary directory. (Default=None)
zip_method : str
The method used to zip the file. Supports 'zip' and None.
If None, downloads files directly. (default=None)

"""

if zip_method:
# Use a temporary location.
dl_fname = os.path.join(temp_path, fname)
else:
# Use the pysat data directory.
dl_fname = os.path.join(data_path, fname)

# Download the file to desired destination.
with open(dl_fname, 'wb') as open_f:
open_f.write(remote_file)

# Unzip and move the files from the temporary directory.
if zip_method == 'zip':
with zipfile.ZipFile(dl_fname, 'r') as open_zip:
open_zip.extractall(data_path)

elif zip_method is not None:
logger.warning('{:} is not a recognized zip method'.format(zip_method))

return


Expand Down