-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #24 from opesci/compressionV2
Compression
- Loading branch information
Showing
15 changed files
with
720 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import blosc | ||
import pyzfp | ||
import numpy as np | ||
from contexttimer import Timer | ||
from functools import partial | ||
import pickle | ||
|
||
|
||
DEFAULTS = {None: {}, 'blosc': {'chunk_size': 1000000}, | ||
'zfp': {'tolerance': 0.0000001, 'parallel': True}} | ||
|
||
|
||
def init_compression(params): | ||
params = params.copy() | ||
scheme = params.pop('scheme', None) | ||
if scheme == 'custom': | ||
compressor = params.pop('compressor', None) | ||
decompressor = params.pop('decompressor', None) | ||
else: | ||
compressor = compressors[scheme] | ||
decompressor = decompressors[scheme] | ||
default_values = DEFAULTS[scheme] | ||
for k, v in default_values.items(): | ||
if k not in params: | ||
params[k] = v | ||
part_compressor = partial(compressor, params) | ||
part_decompressor = partial(decompressor, params) | ||
return part_compressor, part_decompressor | ||
|
||
|
||
def no_compression_in(params, indata): | ||
return CompressedObject(memoryview(indata.tobytes()), shape=indata.shape, | ||
dtype=indata.dtype) | ||
|
||
|
||
def no_compression_out(params, indata): | ||
return np.frombuffer(indata.data, dtype=indata.dtype).reshape(indata.shape) | ||
|
||
|
||
def blosc_compress(params, indata): | ||
s = indata.tostring() | ||
chunk_size = params.get('chunk_size') | ||
chunked = [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)] | ||
time = 0 | ||
size = 0 | ||
compressed = bytes() | ||
chunk_sizes = [] | ||
for chunk in chunked: | ||
with Timer(factor=1000) as t: | ||
c = blosc.compress(chunk) | ||
compressed += c | ||
time += t.elapsed | ||
size += len(c) | ||
chunk_sizes.append(len(c)) | ||
metadata = {'shape': indata.shape, 'dtype': indata.dtype, | ||
'chunks': chunk_sizes} | ||
return CompressedObject(data=compressed, metadata=metadata) | ||
|
||
|
||
def blosc_decompress(params, indata): | ||
compressed = indata.data | ||
chunk_sizes = indata.metadata['chunks'] | ||
|
||
ptr = 0 | ||
decompressed = bytes() | ||
for s in chunk_sizes: | ||
c = compressed[ptr:(ptr + s)] | ||
d = blosc.decompress(c) | ||
decompressed += d | ||
ptr += s | ||
return np.frombuffer(decompressed, | ||
dtype=indata.dtype).reshape(indata.shape) | ||
|
||
|
||
class CompressedObject(object): | ||
def __init__(self, data, shape=None, dtype=None, metadata=None): | ||
assert(metadata is None or (shape is None and dtype is None)) | ||
if metadata is not None: | ||
assert('shape' in metadata and 'dtype' in metadata) | ||
shape = metadata['shape'] | ||
dtype = metadata['dtype'] | ||
else: | ||
metadata = {'shape': shape, 'dtype': dtype} | ||
self.shape = shape | ||
self.dtype = dtype | ||
self.data = data | ||
self.metadata = metadata | ||
self.pickled_metadata = pickle.dumps(self.metadata) | ||
|
||
|
||
def zfp_compress(params, indata): | ||
return CompressedObject(memoryview(pyzfp.compress(indata, **params)), | ||
shape=indata.shape, dtype=indata.dtype) | ||
|
||
|
||
def zfp_decompress(params, indata): | ||
assert(isinstance(indata, CompressedObject)) | ||
return pyzfp.decompress(indata.data, indata.shape, indata.dtype, | ||
**params) | ||
|
||
|
||
compressors = {None: no_compression_in, 'blosc': blosc_compress, | ||
'zfp': zfp_compress} | ||
decompressors = {None: no_compression_out, 'blosc': blosc_decompress, | ||
'zfp': zfp_decompress} | ||
allowed_names = [None, 'blosc', 'zfp'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import pickle | ||
import numpy as np | ||
|
||
|
||
def dumps(data): | ||
if isinstance(data, np.ndarray): | ||
data = {'data': data.tobytes(), 'shape': data.shape, | ||
'dtype': data.dtype, 'creator': 'custom_pickle'} | ||
return pickle.dumps(data) | ||
|
||
|
||
def loads(data): | ||
outdata = pickle.loads(data) | ||
if isinstance(outdata, dict) \ | ||
and 'creator' in outdata \ | ||
and outdata['creator'] == 'custom_pickle': | ||
outdata = np.frombuffer(outdata['data'], dtype=outdata['dtype']) | ||
outdata = outdata.reshape(outdata['shape']) | ||
return outdata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import logging | ||
|
||
logger = logging.getLogger("pyRevolve") | ||
logger.setLevel(logging.DEBUG) | ||
|
||
ch = logging.StreamHandler() | ||
ch.setLevel(logging.DEBUG) | ||
# create formatter and add it to the handlers | ||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # noqa | ||
|
||
ch.setFormatter(formatter) | ||
# add the handlers to the logger | ||
logger.addHandler(ch) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
from timeit import default_timer | ||
|
||
|
||
class Timer(object): | ||
def __init__(self, profiler, section, action): | ||
self.timer = default_timer | ||
self.profiler = profiler | ||
self.section = section | ||
self.action = action | ||
|
||
def __enter__(self): | ||
self.start = self.timer() | ||
return self | ||
|
||
def __exit__(self, *args): | ||
end = self.timer() | ||
self.elapsed_secs = end - self.start | ||
self.elapsed = self.elapsed_secs * 1000 # millisecs | ||
self.profiler.increment(self.section, self.action, self.elapsed) | ||
|
||
|
||
class Profiler(object): | ||
def __init__(self): | ||
self.timings = {} | ||
self.counts = {} | ||
|
||
def get_timer(self, section, action): | ||
return Timer(self, section, action) | ||
|
||
def increment(self, section, action, elapsed): | ||
# Warning: Not thread safe | ||
section_timings = self.timings.get(section, {}) | ||
section_timings[action] = section_timings.get(action, 0) + elapsed | ||
self.timings[section] = section_timings | ||
|
||
section_counts = self.counts.get(section, {}) | ||
section_counts[action] = section_counts.get(action, 0) + 1 | ||
self.counts[section] = section_counts | ||
|
||
def summary(self): | ||
summary = '****************' | ||
for section, section_timings in self.timings.items(): | ||
summary += '\nIn section %s:' % section | ||
for action, action_time in section_timings.items(): | ||
summary += '\n\tAction %s: %f (%d)' \ | ||
% (action, action_time, | ||
self.counts[section][action]) | ||
summary += '\n****************' | ||
return summary | ||
|
||
def get_dict(self): | ||
results = {} | ||
for s_n, s_dict in self.timings.items(): | ||
for a_n, a_time in s_dict.items(): | ||
results['%s_%s_timing' % (s_n, a_n)] = a_time | ||
|
||
for s_n, s_dict in self.counts.items(): | ||
for a_n, a_time in s_dict.items(): | ||
results['%s_%s_counts' % (s_n, a_n)] = a_time | ||
|
||
return results |
Oops, something went wrong.