-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproxy.py
80 lines (62 loc) · 2.5 KB
/
proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""here goes the proxy"""
import os
import datetime as dt
import pandas as pd
from parameter import Parameter
class DataMerger():
'''
Class that handels the merging of the different Dataframes.
'''
def __init__(self):
'''Constructor of DataMerger'''
self.params = Parameter.get_instance()
# make a list containing all paths to processed data
folders = self.params.folders
# join the paths structure ~ ./res/'folder_name'/processed
path_to_res = os.path.join('.', 'res')
self.path_to_processed = []
for folder in folders:
path = os.path.join(path_to_res, folder, 'processed')
self.path_to_processed.append(path)
# init other attributes
self.frame = pd.DataFrame()
def get_all_data(self, save_data=True):
'''
Method that merges all dataframes from res/processed/*
@param save_data : Boolean value set True if you want to save the frame
'''
start = self.params.start_date_data
end = self.params.end_date_data
self.frame = get_date_frame(start, end)
for folder in self.path_to_processed:
dirs = os.listdir(folder)
for file in dirs:
if file.endswith('.csv'):
path_processed_data = os.path.join(folder, file)
try:
df_tmp = pd.read_csv(path_processed_data)
df_tmp['Date'] = pd.to_datetime(df_tmp['Date'], utc=True)
self.frame = self.frame.merge(df_tmp, how='left', on='Date')
except Exception as exep: # pylint: disable=broad-except
print('Something is wrong with the data stored in {}'.format(path_processed_data))
print(exep)
if save_data:
path = os.path.join('.', 'res', 'all_raw.csv')
self.frame.to_csv(path)
return self.frame
def get_all_paths(self):
'''Returns all file paths where we read data from '''
return self.path_to_processed
def get_date_frame(start, end):
""" Generate list of all dates between start and end date"""
date_list = [start + dt.timedelta(days=x) for x in range(0, (end - start).days)]
frame = pd.DataFrame(date_list)
frame.columns = ['Date']
frame['Date'] = pd.to_datetime(frame['Date'], utc=True)
return frame
def test():
'''Dummy'''
da_me = DataMerger()
da_me.get_all_data()
if __name__ == '__main__':
test()