-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
283 lines (229 loc) · 8.4 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
import logging
import os
import re
import numpy as np
import pandas as pd
import pynvml
import torch
from constant import Constants
from spider.UserSubmissionSpider import sanitize_filename
logger = logging.getLogger()
def save_model(model, save_dir, train_epoch_info, test_epoch_info, arg):
"""
保存模型,包含参数配置信息
"""
save_dir = os.path.normpath(save_dir)
config_str = get_config_string(arg)
model_folder = os.path.join(save_dir, type(model).__name__, config_str)
# 创建文件夹
os.makedirs(model_folder, exist_ok=True)
file_name = (
f"model_train_epoch_{train_epoch_info['epoch']}_"
f"train_loss_{train_epoch_info['loss']:.4f}_"
f"train_mae_{train_epoch_info['mae']:.4f}_"
f"train_acc_{train_epoch_info['acc']:.4f}_"
f"test_loss_{test_epoch_info['loss']:.4f}_"
f"test_mae_{test_epoch_info['mae']:.4f}_"
f"test_acc_{test_epoch_info['acc']:.4f}.pth"
)
file_path = os.path.join(model_folder, file_name)
# 保存前先将模型状态转移到CPU
state_dict = model.state_dict()
cpu_state_dict = {key: value.cpu() for key, value in state_dict.items()}
torch.save(cpu_state_dict, file_path)
logger.info(f"模型已保存到: {file_path}")
def load_latest_model(model, save_dir, arg):
"""
加载模型,考虑参数配置信息
"""
save_dir = os.path.normpath(save_dir)
config_str = get_config_string(arg)
model_folder = os.path.join(save_dir, type(model).__name__, config_str)
device = torch.device(f'cuda:{Constants.CUDA}' if torch.cuda.is_available() else 'cpu')
if not os.path.exists(model_folder):
logger.info(f"模型文件夹 {model_folder} 不存在。")
return 0
epoch_pattern = re.compile(r'model_.*_epoch_(\d+)_.*\.pth')
model_files = [
f for f in os.listdir(model_folder)
if os.path.isfile(os.path.join(model_folder, f)) and epoch_pattern.match(f)
]
if not model_files:
logger.info(f"没有找到模型文件在 {model_folder} 中。")
return 0
model_files.sort(key=lambda x: int(epoch_pattern.search(x).group(1)), reverse=True)
latest_model_path = os.path.join(model_folder, model_files[0])
logger.info(f"加载最新模型: {latest_model_path}")
latest_epoch = int(epoch_pattern.search(model_files[0]).group(1))
# 加载时指定目标设备
model_state = torch.load(latest_model_path, map_location=device)
model.load_state_dict(model_state)
return latest_epoch
def get_config_string(arg):
"""
将配置参数转换为字符串,用于文件夹命名
"""
config_str = []
for key, value in sorted(arg.items()):
config_str.append(f"{key}-{value}")
return "_".join(config_str)
def load_embedding(filename):
f = open(filename, encoding='utf-8')
wcnt, emb_size = next(f).strip().split(' ')
wcnt = int(wcnt)
emb_size = int(emb_size)
words = []
embs = []
for line in f:
fields = line.strip().split(' ')
word = fields[0]
emb = np.array([float(x) for x in fields[1:]])
words.append(word)
embs.append(emb)
embs = np.asarray(embs)
return wcnt, emb_size, words, embs
def load_glove_embedding(filename):
words = []
embs = []
# 读取第一行来确定向量维度
with open(filename, encoding='utf-8') as f:
first_line = f.readline().strip().split(' ')
emb_size = len(first_line) - 1 # 减去词本身,得到向量维度
# 重新读取文件
with open(filename, encoding='utf-8') as f:
for line in f:
fields = line.strip().split(' ')
word = fields[0]
emb = np.array([float(x) for x in fields[1:]])
words.append(word)
embs.append(emb)
# 转换为numpy数组并获取词表大小
embs = np.asarray(embs)
wcnt = len(words)
return wcnt, emb_size, words, embs
import shutil
import os
def delete_folder(folder_path):
try:
# 检查路径是否存在且是一个文件夹
if os.path.exists(folder_path) and os.path.isdir(folder_path):
# 删除文件夹及其所有内容
shutil.rmtree(folder_path)
print(f"文件夹 '{folder_path}' 已成功删除。")
else:
print(f"'{folder_path}' 不是一个有效的文件夹路径。")
except Exception as e:
print(f"删除文件夹时发生错误: {e}")
def extract_file_paths(error_file_path):
file_paths = []
with open(error_file_path, 'r', encoding='utf-8') as file:
for line in file:
parts = line.strip().split(' : ')
if len(parts) > 1:
file_paths.append(parts[1])
return file_paths
def delete_files(base_directory, file_paths):
for relative_path in file_paths:
full_path = os.path.join(base_directory, relative_path)
try:
if os.path.exists(full_path):
os.remove(full_path)
print(f"已删除文件: {full_path}")
else:
print(f"文件不存在: {full_path}")
except Exception as e:
print(f"删除文件 {full_path} 时发生错误: {e}")
def get_folder_names(directory):
return set(folder for folder in os.listdir(directory) if os.path.isdir(os.path.join(directory, folder)))
def extract_users_from_ratings(ratings_directory):
users = set()
for file in os.listdir(ratings_directory):
if file.endswith(".xlsx"):
file_path = os.path.join(ratings_directory, file)
df = pd.read_excel(file_path)
users.update(df['User Name'].dropna())
return users
def reverse_sanitize_filename(filename):
if filename.endswith('_dot'):
return filename[:-4] + '.'
return filename
import torch
import os
import glob
def convert_model_to_cpu(model_path):
"""
将模型从GPU转换到CPU并重新保存
"""
try:
# 加载模型状态
state_dict = torch.load(model_path, map_location='cpu')
# 确保所有张量都在CPU上
cpu_state_dict = {key: value.cpu() for key, value in state_dict.items()}
# 创建备份
backup_path = model_path + '.backup'
if not os.path.exists(backup_path):
os.rename(model_path, backup_path)
# 保存CPU版本
torch.save(cpu_state_dict, model_path)
print(f"成功转换模型: {model_path}")
return True
except Exception as e:
print(f"转换模型时出错 {model_path}: {str(e)}")
return False
def batch_convert_models(base_dir):
"""
批量转换目录下的所有模型文件
"""
# 查找所有.pth文件
model_files = glob.glob(os.path.join(base_dir, "**/*.pth"), recursive=True)
success_count = 0
fail_count = 0
for model_path in model_files:
print(f"处理模型: {model_path}")
if convert_model_to_cpu(model_path):
success_count += 1
else:
fail_count += 1
print(f"\n转换完成:")
print(f"成功: {success_count}")
print(f"失败: {fail_count}")
print(f"总计: {len(model_files)}")
def verify_model_device(model_path):
"""
验证模型中的所有张量是否都在CPU上
"""
state_dict = torch.load(model_path)
all_on_cpu = True
for key, tensor in state_dict.items():
if tensor.device.type != 'cpu':
print(f"警告: {key} 在 {tensor.device}")
all_on_cpu = False
return all_on_cpu
def check_memory(description=""):
import torch
import gc
# Force garbage collection
gc.collect()
torch.cuda.empty_cache()
# Get memory stats
allocated = torch.cuda.memory_allocated() / (1024 * 1024) # Convert to MB
reserved = torch.cuda.memory_reserved() / (1024 * 1024) # Convert to MB
print(f"\n=== Memory Status {description} ===")
print(f"Allocated: {allocated:.2f} MB")
print(f"Reserved: {reserved:.2f} MB")
# import pandas as pd
#
# # Load the first Excel file
# df1 = pd.read_excel(r'D:\MyKT\data\new.xlsx')
#
# # Load the second Excel file
# df2 = pd.read_excel(r'D:\MyKT\data\codeforces_problem_detail.xlsx')
#
# # Merge the two dataframes based on 'cid' and 'qindex'
# merged_df = pd.merge(df1, df2, on=['cid', 'qindex'], how='left')
#
# # Add the 'content' column from df2 to df1
# df1['content'] = merged_df['content']
#
# # Save the updated dataframe back to the first Excel file
# df1.to_excel(r'D:\MyKT\data\new.xlsx', index=False)