-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathextractor.py
115 lines (90 loc) · 4.16 KB
/
extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
Abstract Extractor class
All real extractors need to inherit from it and overwrite all @abstractmethod
"""
from abc import ABC, abstractmethod
import exceptions
class Extractor(ABC):
def __init__(self, bank_text: str):
self.bank_text = bank_text
@abstractmethod
def check_specific_signatures(self):
"""Function is not expected to return any result, but is expected to raise
exceptions.InputFileStructureError() if the text is not supported
"""
pass
@abstractmethod
def get_period_balance(self) -> float:
"""Function gets information about transaction balance from the header of the banlk extract
Returns:
float: balance of the period
"""
pass
@abstractmethod
def split_text_on_entries(self)->list[str]:
"""Function splits the text on entries (on individual transactions)
Returns:
list[str]: list of entries
"""
pass
@abstractmethod
def decompose_entry_to_dict(self, entry: str) -> dict | list[dict]:
"""Function decomposes entry into a dictionary or a list of dictionaries
Args:
entry (str): _description_
Returns:
list[dict]: resulting dictionary or list of dictionaries. The possibility to return a list of dictionaries
is added to support to support cases like this https://github.com/Ev2geny/Sberbank2Excel/issues/51
which are probably can be considered to be bugs
"""
pass
@abstractmethod
def get_column_name_for_balance_calculation(self) -> str:
"""Function returns the name of the column that is used for calculation of the balance of the period
Returns:
str: _description_
"""
pass
@abstractmethod
def get_columns_info(self)->dict:
"""Returns full column names in the order and in the form they shall appear in Excel
The keys in dictionary shall correspond to keys of the result of the function self.decompose_entry_to_dict()
"""
def check_support(self)->bool:
"""Function checks if the text is supported by the extractor
Returns:
bool: True if the text is supported, False otherwise
"""
try:
# this would raise an exception if the text is not supported
self.check_specific_signatures()
result = isinstance(self.get_period_balance(),float) and len(self.split_text_on_entries()) > 0
return result
except exceptions.InputFileStructureError:
return False
def get_entries(self) -> list[dict]:
"""Function returns list of dictionaries, where each dictionary corresponds to one entry
Raises:
e: if there was an error while processing one of the entries, the function with print the entry and raise the exception
Returns:
list[dict]: list of dictionaries, where each dictionary corresponds to one entry
"""
# entries_list_of_dicts = [self.decompose_entry_to_dict(entry) for entry in self.split_text_on_entries()]
entries_list_of_dicts = []
for entry in self.split_text_on_entries():
try:
result = self.decompose_entry_to_dict(entry)
if isinstance(result, dict):
entries_list_of_dicts.append(result)
elif isinstance(result, list):
entries_list_of_dicts.extend(result)
else:
raise RuntimeError("decompose_entry_to_dict() shall return either dict or list[dict]")
except Exception as e:
raise RuntimeError("Ошибка при обработке трансакции\n" +
"-"*20 +
"\n" +
entry +
"\n" +
"-"*20) from e
return entries_list_of_dicts