From 8625054e45163f3438282f96fddb77f776fb7d31 Mon Sep 17 00:00:00 2001 From: davidmunoznovoa Date: Fri, 22 Mar 2024 14:54:58 +0100 Subject: [PATCH 1/2] Refact of A5D and B5D --- mesures/a5d.py | 17 ++++--- mesures/b5d.py | 108 ++------------------------------------------- mesures/headers.py | 16 +------ 3 files changed, 15 insertions(+), 126 deletions(-) diff --git a/mesures/a5d.py b/mesures/a5d.py index 9fc4364..3c4384d 100644 --- a/mesures/a5d.py +++ b/mesures/a5d.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from mesures.dates import * -from mesures.headers import A5D_HEADER as columns +from mesures.headers import A5D_HEADER as COLUMNS from mesures.parsers.dummy_data import DummyCurve from mesures.utils import check_line_terminator_param import os @@ -17,6 +17,7 @@ def __init__(self, data, distributor=None, comer=None, compression='bz2', versio """ if isinstance(data, list): data = DummyCurve(data).curve_data + self.columns = COLUMNS self.file = self.reader(data) self.generation_date = datetime.now() self.prefix = 'A5D' @@ -80,7 +81,7 @@ def number_of_cups(self): def reader(self, filepath): if isinstance(filepath, str): df = pd.read_csv( - filepath, sep=';', names=columns + filepath, sep=';', names=self.columns ) elif isinstance(filepath, list): df = pd.DataFrame(data=filepath) @@ -93,7 +94,7 @@ def reader(self, filepath): df['timestamp'] = df['timestamp'].apply(lambda x: x.strftime('%Y/%m/%d %H:%M')) for key in ['r1', 'r2', 'r3', 'r4', 'ae', 'method', 'firmeza']: df[key] = '' - df = df[columns] + df = df[self.columns] return df def writer(self): @@ -101,11 +102,16 @@ def writer(self): A5D contains a hourly raw curve :return: file path """ - file_path = os.path.join('/tmp', self.filename) + existing_files = os.listdir('/tmp') + if existing_files: + versions = [int(f.split('.')[1]) for f in existing_files if self.filename.split('.')[0] in f] + if versions: + self.version = max(versions) + 1 + file_path = os.path.join('/tmp', self.filename) kwargs = {'sep': ';', 'header': False, - 'columns': columns, + 'columns': self.columns, 'index': False, check_line_terminator_param(): ';\n' } @@ -113,5 +119,4 @@ def writer(self): kwargs.update({'compression': self.default_compression}) self.file.to_csv(file_path, **kwargs) - return file_path diff --git a/mesures/b5d.py b/mesures/b5d.py index 41b5c78..2b9eb2a 100644 --- a/mesures/b5d.py +++ b/mesures/b5d.py @@ -1,13 +1,8 @@ # -*- coding: utf-8 -*- -from mesures.dates import * -from mesures.headers import B5D_HEADER as columns -from mesures.parsers.dummy_data import DummyCurve -from mesures.utils import check_line_terminator_param -import os -import pandas as pd +from mesures.a5d import A5D -class B5D(): +class B5D(A5D): def __init__(self, data, distributor=None, comer=None, compression='bz2', version=0): """ :param data: list of dicts or absolute file_path @@ -15,102 +10,5 @@ def __init__(self, data, distributor=None, comer=None, compression='bz2', versio :param comer: str comer REE code :param compression: 'bz2', 'gz'... OR False otherwise """ - if isinstance(data, list): - data = DummyCurve(data).curve_data - self.file = self.reader(data) - self.generation_date = datetime.now() + super(B5D, self).__init__(data, distributor=distributor, comer=comer, compression=compression, version=version) self.prefix = 'B5D' - self.default_compression = compression - self.version = version - self.distributor = distributor - self.comer = comer - - def __repr__(self): - return "{}: {} Wh".format(self.filename, self.total) - - def __gt__(self, other): - return self.total > other.total - - def __lt__(self, other): - return self.total < other.total - - def __eq__(self, other): - return self.file.equals(other.file) - - def __add__(self, other): - return self.file.append(other.file) - - def __len__(self): - return len(self.file) - - @property - def filename(self): - if self.default_compression: - return "{prefix}_{distributor}_{comer}_{timestamp}.{version}.{compression}".format( - prefix=self.prefix, distributor=self.distributor, comer=self.comer, - timestamp=self.generation_date.strftime('%Y%m%d'), version=self.version, - compression=self.default_compression - ) - else: - return "{prefix}_{distributor}_{comer}_{timestamp}.{version}".format( - prefix=self.prefix, distributor=self.distributor, comer=self.comer, - timestamp=self.generation_date.strftime('%Y%m%d'), version=self.version - ) - - @property - def total(self): - return int(self.file['ae'].sum()) - - @property - def ai(self): - return int(self.file['ai'].sum()) - - @property - def ae(self): - return int(self.file['ae'].sum()) - - @property - def cups(self): - return list(set(self.file['cups'])) - - @property - def number_of_cups(self): - return len(list(set(self.file['cups']))) - - def reader(self, filepath): - if isinstance(filepath, str): - df = pd.read_csv( - filepath, sep=';', names=columns - ) - elif isinstance(filepath, list): - df = pd.DataFrame(data=filepath) - else: - raise Exception("Filepath must be an str or a list") - - df = df.groupby(['cups', 'timestamp', 'season', 'factura']).aggregate( - {'ai': 'sum', 'ae': 'sum'} - ).reset_index() - df['timestamp'] = df['timestamp'].apply(lambda x: x.strftime('%Y/%m/%d %H:%M')) - for key in ['r1', 'r2', 'r3', 'r4', 'method', 'firmeza']: - df[key] = '' - df['ai'] = 0 - df = df[columns] - return df - - def writer(self): - """ - B5D contains a hourly raw curve - :return: file path - """ - file_path = os.path.join('/tmp', self.filename) - kwargs = {'sep': ';', - 'header': False, - 'columns': columns, - 'index': False, - check_line_terminator_param(): ';\n' - } - if self.default_compression: - kwargs.update({'compression': self.default_compression}) - - self.file.to_csv(file_path, **kwargs) - return file_path diff --git a/mesures/headers.py b/mesures/headers.py index dbace64..96015ff 100644 --- a/mesures/headers.py +++ b/mesures/headers.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# Valid for A5D and B5D A5D_HEADER = [ 'cups', 'timestamp', @@ -56,21 +57,6 @@ 'data_baixa' ] -B5D_HEADER = [ - 'cups', - 'timestamp', - 'season', - 'ai', - 'ae', - 'r1', - 'r2', - 'r3', - 'r4', - 'method', - 'firmeza', - 'factura' -] - CILCAU_HEADER = [ 'cau', 'cil', From 8d5d723bd5cf91abc01428016442c98ba5d6d1e6 Mon Sep 17 00:00:00 2001 From: davidmunoznovoa Date: Fri, 22 Mar 2024 15:25:12 +0100 Subject: [PATCH 2/2] Update tests --- mesures/a5d.py | 2 +- spec/generation_files_spec.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/mesures/a5d.py b/mesures/a5d.py index 3c4384d..a909997 100644 --- a/mesures/a5d.py +++ b/mesures/a5d.py @@ -7,7 +7,7 @@ import pandas as pd -class A5D(): +class A5D(object): def __init__(self, data, distributor=None, comer=None, compression='bz2', version=0): """ :param data: list of dicts or absolute file_path diff --git a/spec/generation_files_spec.py b/spec/generation_files_spec.py index 5eb1458..69cefa8 100644 --- a/spec/generation_files_spec.py +++ b/spec/generation_files_spec.py @@ -864,12 +864,12 @@ def get_sample_cups45_data(): f1 = f.writer() assert isinstance(f1, str) assert 'bz2' not in f1 - assert f1.endswith('.0') + # Version control causes file to be version 1 instead of 0 + assert f1.endswith('.1') with description('A B5D'): with it('bz2 as a default compression'): - f = B5D([{'cups': 'XDS', 'timestamp': datetime.now(), 'season': 1, 'ai': 0, 'factura': 123}], - distributor='1234', comer='1235', compression='bz2') + f = B5D([{'cups': 'XDS', 'timestamp': datetime.now(), 'season': 1, 'ai': 0, 'factura': 123}], compression='bz2') assert isinstance(f.filename, str) assert '.bz2' in f.filename assert f.filename.endswith('.bz2') @@ -885,7 +885,8 @@ def get_sample_cups45_data(): f1 = f.writer() assert isinstance(f1, str) assert 'bz2' not in f1 - assert f1.endswith('.0') + # Version control causes file to be version 1 instead of 0 + assert f1.endswith('.1') with description('An F3'): with it('is instance of F3 Class'):