Skip to content

Commit

Permalink
Merge pull request #24 from opesci/compressionV2
Browse files Browse the repository at this point in the history
Compression
  • Loading branch information
navjotk authored Mar 13, 2019
2 parents 77dad44 + 2a287df commit 86d884e
Show file tree
Hide file tree
Showing 15 changed files with 720 additions and 105 deletions.
24 changes: 5 additions & 19 deletions examples/use_modernised.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,11 @@ def __init__(self, symbols):
raise Exception("Symbols must be a Mapping, for example a \
dictionary.")

def save(self, ptr):
"""Overwrite live-data in this Checkpoint object with data found at
the ptr location."""
i_ptr_lo = 0
i_ptr_hi = 0
for i in self.symbols:
i_ptr_hi = i_ptr_hi + self.symbols[i].size
ptr[i_ptr_lo:i_ptr_hi] = self.symbols[i].data[:]
i_ptr_lo = i_ptr_hi

def load(self, ptr):
"""Copy live-data from this Checkpoint object into the memory given by
the ptr."""
i_ptr_lo = 0
i_ptr_hi = 0
for i in self.symbols:
i_ptr_hi = i_ptr_hi + self.symbols[i].size
self.symbols[i].data[:] = ptr[i_ptr_lo:i_ptr_hi]
i_ptr_lo = i_ptr_hi
def get_data_location(self, timestep):
return [x.data for x in list(self.symbols.values())]

def get_data(self, timestep):
return [x.data for x in self.symbols.values()]

@property
def size(self):
Expand Down
106 changes: 106 additions & 0 deletions pyrevolve/compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import blosc
import pyzfp
import numpy as np
from contexttimer import Timer
from functools import partial
import pickle


DEFAULTS = {None: {}, 'blosc': {'chunk_size': 1000000},
'zfp': {'tolerance': 0.0000001, 'parallel': True}}


def init_compression(params):
params = params.copy()
scheme = params.pop('scheme', None)
if scheme == 'custom':
compressor = params.pop('compressor', None)
decompressor = params.pop('decompressor', None)
else:
compressor = compressors[scheme]
decompressor = decompressors[scheme]
default_values = DEFAULTS[scheme]
for k, v in default_values.items():
if k not in params:
params[k] = v
part_compressor = partial(compressor, params)
part_decompressor = partial(decompressor, params)
return part_compressor, part_decompressor


def no_compression_in(params, indata):
return CompressedObject(memoryview(indata.tobytes()), shape=indata.shape,
dtype=indata.dtype)


def no_compression_out(params, indata):
return np.frombuffer(indata.data, dtype=indata.dtype).reshape(indata.shape)


def blosc_compress(params, indata):
s = indata.tostring()
chunk_size = params.get('chunk_size')
chunked = [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]
time = 0
size = 0
compressed = bytes()
chunk_sizes = []
for chunk in chunked:
with Timer(factor=1000) as t:
c = blosc.compress(chunk)
compressed += c
time += t.elapsed
size += len(c)
chunk_sizes.append(len(c))
metadata = {'shape': indata.shape, 'dtype': indata.dtype,
'chunks': chunk_sizes}
return CompressedObject(data=compressed, metadata=metadata)


def blosc_decompress(params, indata):
compressed = indata.data
chunk_sizes = indata.metadata['chunks']

ptr = 0
decompressed = bytes()
for s in chunk_sizes:
c = compressed[ptr:(ptr + s)]
d = blosc.decompress(c)
decompressed += d
ptr += s
return np.frombuffer(decompressed,
dtype=indata.dtype).reshape(indata.shape)


class CompressedObject(object):
def __init__(self, data, shape=None, dtype=None, metadata=None):
assert(metadata is None or (shape is None and dtype is None))
if metadata is not None:
assert('shape' in metadata and 'dtype' in metadata)
shape = metadata['shape']
dtype = metadata['dtype']
else:
metadata = {'shape': shape, 'dtype': dtype}
self.shape = shape
self.dtype = dtype
self.data = data
self.metadata = metadata
self.pickled_metadata = pickle.dumps(self.metadata)


def zfp_compress(params, indata):
return CompressedObject(memoryview(pyzfp.compress(indata, **params)),
shape=indata.shape, dtype=indata.dtype)


def zfp_decompress(params, indata):
assert(isinstance(indata, CompressedObject))
return pyzfp.decompress(indata.data, indata.shape, indata.dtype,
**params)


compressors = {None: no_compression_in, 'blosc': blosc_compress,
'zfp': zfp_compress}
decompressors = {None: no_compression_out, 'blosc': blosc_decompress,
'zfp': zfp_decompress}
allowed_names = [None, 'blosc', 'zfp']
6 changes: 5 additions & 1 deletion pyrevolve/crevolve.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ cimport revolve_c
from enum import Enum
import warnings

from tools import OutputGrabber


class RevolveError(Exception):
pass
# TODO: the hardcoded limits really should be removed in a future version. This should be as easy as replacing the arrays in the C++ code with an std::vector.
Expand Down Expand Up @@ -78,7 +81,8 @@ cdef class CRevolve(object):

def revolve(self):
cdef revolve_c.CACTION action
action = revolve_c.revolve(self.__r)
with OutputGrabber() as og:
action = revolve_c.revolve(self.__r)
if(action == revolve_c.CACTION_ADVANCE):
retAction = Action.advance
elif(action == revolve_c.CACTION_TAKESHOT):
Expand Down
19 changes: 19 additions & 0 deletions pyrevolve/custom_pickle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pickle
import numpy as np


def dumps(data):
if isinstance(data, np.ndarray):
data = {'data': data.tobytes(), 'shape': data.shape,
'dtype': data.dtype, 'creator': 'custom_pickle'}
return pickle.dumps(data)


def loads(data):
outdata = pickle.loads(data)
if isinstance(outdata, dict) \
and 'creator' in outdata \
and outdata['creator'] == 'custom_pickle':
outdata = np.frombuffer(outdata['data'], dtype=outdata['dtype'])
outdata = outdata.reshape(outdata['shape'])
return outdata
13 changes: 13 additions & 0 deletions pyrevolve/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import logging

logger = logging.getLogger("pyRevolve")
logger.setLevel(logging.DEBUG)

ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
# create formatter and add it to the handlers
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # noqa

ch.setFormatter(formatter)
# add the handlers to the logger
logger.addHandler(ch)
61 changes: 61 additions & 0 deletions pyrevolve/profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from timeit import default_timer


class Timer(object):
def __init__(self, profiler, section, action):
self.timer = default_timer
self.profiler = profiler
self.section = section
self.action = action

def __enter__(self):
self.start = self.timer()
return self

def __exit__(self, *args):
end = self.timer()
self.elapsed_secs = end - self.start
self.elapsed = self.elapsed_secs * 1000 # millisecs
self.profiler.increment(self.section, self.action, self.elapsed)


class Profiler(object):
def __init__(self):
self.timings = {}
self.counts = {}

def get_timer(self, section, action):
return Timer(self, section, action)

def increment(self, section, action, elapsed):
# Warning: Not thread safe
section_timings = self.timings.get(section, {})
section_timings[action] = section_timings.get(action, 0) + elapsed
self.timings[section] = section_timings

section_counts = self.counts.get(section, {})
section_counts[action] = section_counts.get(action, 0) + 1
self.counts[section] = section_counts

def summary(self):
summary = '****************'
for section, section_timings in self.timings.items():
summary += '\nIn section %s:' % section
for action, action_time in section_timings.items():
summary += '\n\tAction %s: %f (%d)' \
% (action, action_time,
self.counts[section][action])
summary += '\n****************'
return summary

def get_dict(self):
results = {}
for s_n, s_dict in self.timings.items():
for a_n, a_time in s_dict.items():
results['%s_%s_timing' % (s_n, a_n)] = a_time

for s_n, s_dict in self.counts.items():
for a_n, a_time in s_dict.items():
results['%s_%s_counts' % (s_n, a_n)] = a_time

return results
Loading

0 comments on commit 86d884e

Please sign in to comment.