From 8aa118ec9fb355221b3750e5349180affa0fec1d Mon Sep 17 00:00:00 2001 From: Sarah Date: Wed, 5 Jun 2019 12:45:33 -0400 Subject: [PATCH 1/5] added gz handling to jh_utils.open_4dn_file --- dcicutils/jh_utils.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/dcicutils/jh_utils.py b/dcicutils/jh_utils.py index c26def74b..9a1fd5cbb 100644 --- a/dcicutils/jh_utils.py +++ b/dcicutils/jh_utils.py @@ -1,4 +1,5 @@ import functools +import gzip import types import os import sys @@ -247,23 +248,32 @@ def open_4dn_file(obj_id, format=None, local=True): """ file_info = find_valid_file_or_extra_file(obj_id, format) # this will be the base case in the future... + gz = False if not local: # this seems to handle both binary and non-binary files ff_file = use_urllib.urlopen(file_info['full_href']) + if file_info.get('full_href', '').endswith('.gz'): + gz = True else: - ff_file = open(file_info['full_path']) + if file_info.get('full_path', '').endswith('.gz'): + ff_file = gzip.open(file_info['full_path']) + else: + ff_file = open(file_info['full_path']) # see if the file is binary (needs to opened with 'rb' mode) # try to read a line from the file; if it is read, reset with seek() try: ff_file.readline() - except UnicodeDecodeError: + except UnicodeDecodeError as e: ff_file = open(file_info['full_path'], 'rb') else: ff_file.seek(0) + f = gzip.open(ff_file) if gz else ff_file try: - yield ff_file + yield f finally: - ff_file.close() + if gz: + ff_file.close() + f.close() # LASTLY, do setup that requires the above functions to be defined From e0dccc14017676895a1a0675ecea6a79c33d2faa Mon Sep 17 00:00:00 2001 From: Sarah Date: Wed, 5 Jun 2019 14:20:07 -0400 Subject: [PATCH 2/5] removed unused variable --- dcicutils/jh_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/jh_utils.py b/dcicutils/jh_utils.py index 9a1fd5cbb..f05a48aa8 100644 --- a/dcicutils/jh_utils.py +++ b/dcicutils/jh_utils.py @@ -263,7 +263,7 @@ def open_4dn_file(obj_id, format=None, local=True): # try to read a line from the file; if it is read, reset with seek() try: ff_file.readline() - except UnicodeDecodeError as e: + except UnicodeDecodeError: ff_file = open(file_info['full_path'], 'rb') else: ff_file.seek(0) From 3abd22295ab794aaac7894fe4ed18097351035f1 Mon Sep 17 00:00:00 2001 From: Sarah Date: Wed, 5 Jun 2019 16:12:25 -0400 Subject: [PATCH 3/5] fixed indent in jh_utils.open_4dn_file --- dcicutils/jh_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dcicutils/jh_utils.py b/dcicutils/jh_utils.py index f05a48aa8..f879ceb5e 100644 --- a/dcicutils/jh_utils.py +++ b/dcicutils/jh_utils.py @@ -259,14 +259,14 @@ def open_4dn_file(obj_id, format=None, local=True): ff_file = gzip.open(file_info['full_path']) else: ff_file = open(file_info['full_path']) - # see if the file is binary (needs to opened with 'rb' mode) - # try to read a line from the file; if it is read, reset with seek() - try: - ff_file.readline() - except UnicodeDecodeError: - ff_file = open(file_info['full_path'], 'rb') - else: - ff_file.seek(0) + # see if the file is binary (needs to opened with 'rb' mode) + # try to read a line from the file; if it is read, reset with seek() + try: + ff_file.readline() + except UnicodeDecodeError: + ff_file = open(file_info['full_path'], 'rb') + else: + ff_file.seek(0) f = gzip.open(ff_file) if gz else ff_file try: yield f From faac2182971a72ae91dac015b3de34dd0a23f892 Mon Sep 17 00:00:00 2001 From: Sarah Date: Wed, 5 Jun 2019 16:38:56 -0400 Subject: [PATCH 4/5] jh_utils.open_4dn_file now opens gz files in rt mode --- dcicutils/jh_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/jh_utils.py b/dcicutils/jh_utils.py index f879ceb5e..f60cdb20e 100644 --- a/dcicutils/jh_utils.py +++ b/dcicutils/jh_utils.py @@ -267,7 +267,7 @@ def open_4dn_file(obj_id, format=None, local=True): ff_file = open(file_info['full_path'], 'rb') else: ff_file.seek(0) - f = gzip.open(ff_file) if gz else ff_file + f = gzip.open(ff_file, 'rt') if gz else ff_file try: yield f finally: From 46938c8ff4863ce8f0577969bfd8c2a3524ec421 Mon Sep 17 00:00:00 2001 From: Carl Vitzthum Date: Thu, 6 Jun 2019 15:21:50 -0400 Subject: [PATCH 5/5] version bump --- dcicutils/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dcicutils/_version.py b/dcicutils/_version.py index c9a99e5c7..81eb185f1 100644 --- a/dcicutils/_version.py +++ b/dcicutils/_version.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.7.1" +__version__ = "0.7.2"