-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcsv_handler.py
138 lines (116 loc) · 4.09 KB
/
csv_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
CSV handling functionality for Animagine Prompt Node
"""
import pandas as pd
from typing import Optional, Dict
import os
import time
from .logger import logger
class CSVHandler:
"""
Handles loading and processing of CSV files containing character data
"""
REQUIRED_COLUMNS = ['GENDER', 'CHARACTER', 'COPYRIGHT']
def __init__(self):
self.current_data = None
self.cache = {}
def validate_csv_structure(self, filepath: str) -> bool:
"""
Validates if the CSV file has the required structure
"""
try:
if not os.path.exists(filepath):
logger.log_error(
FileNotFoundError(f"CSV file not found: {filepath}"),
"CSV Validation",
{"filepath": filepath}
)
return False
df = pd.read_csv(filepath)
logger.log_csv_operation(
"Validation",
filepath,
{"columns_found": df.columns.tolist()}
)
missing_columns = [col for col in self.REQUIRED_COLUMNS if col not in df.columns]
if missing_columns:
logger.log_error(
ValueError("Missing required columns"),
"CSV Validation",
{"missing_columns": missing_columns}
)
return False
return True
except Exception as e:
logger.log_error(e, "CSV Validation", {"filepath": filepath})
return False
def load_csv(self, filepath: str) -> Optional[pd.DataFrame]:
"""
Loads CSV file with basic caching
"""
try:
start_time = time.time()
if filepath in self.cache:
logger.log_csv_operation(
"Cache Hit",
filepath,
{"cache_size": len(self.cache)}
)
return self.cache[filepath]
if not self.validate_csv_structure(filepath):
return None
df = pd.read_csv(filepath)
self.cache[filepath] = df
self.current_data = df
end_time = time.time()
logger.log_performance(
"CSV Load",
start_time,
end_time,
{
"filepath": filepath,
"rows": len(df),
"cached": True
}
)
return df
except Exception as e:
logger.log_error(e, "CSV Load", {"filepath": filepath})
return None
def get_entry(self, index: int) -> Optional[Dict[str, str]]:
"""
Gets a specific entry from the loaded CSV
"""
try:
if self.current_data is None:
logger.log_error(
ValueError("No CSV data loaded"),
"Get Entry",
{"index": index}
)
return None
if index >= len(self.current_data):
logger.log_error(
IndexError(f"Index {index} out of bounds"),
"Get Entry",
{
"index": index,
"max_index": len(self.current_data) - 1
}
)
return None
row = self.current_data.iloc[index]
entry = {
'gender': row['GENDER'],
'character': row['CHARACTER'],
'copyright': row['COPYRIGHT']
}
logger.log_csv_operation(
"Get Entry",
None,
{"index": index, "entry": entry}
)
return entry
except Exception as e:
logger.log_error(e, "Get Entry", {"index": index})
return None