From bcc8a7ece7a2edc0f2c86208dd798874a38a6c6b Mon Sep 17 00:00:00 2001 From: "guorong.zheng" <360996299@qq.com> Date: Tue, 24 Sep 2024 11:31:26 +0800 Subject: [PATCH] chore:result cache --- main.py | 10 +++---- utils/channel.py | 70 +++++++++++++++++++++--------------------------- 2 files changed, 36 insertions(+), 44 deletions(-) diff --git a/main.py b/main.py index 88bcf783c7..b8efed4b7e 100644 --- a/main.py +++ b/main.py @@ -7,7 +7,7 @@ write_channel_to_file, setup_logging, cleanup_logging, - get_channel_data_with_cache_compare, + get_channel_data_cache_with_compare, ) from utils.tools import ( update_file, @@ -155,7 +155,7 @@ async def main(self): self.subscribe_result, self.online_search_result, ) - channel_data_with_cache = self.channel_data + channel_data_cache = self.channel_data self.total = self.get_urls_len(filter=True) sort_callback = lambda: self.pbar_update(name="测速") open_sort = config.getboolean("Settings", "open_sort") @@ -190,11 +190,11 @@ async def main(self): shutil.copy(user_final_file, result_file) if config.getboolean("Settings", "open_use_old_result"): if open_sort: - channel_data_with_cache = get_channel_data_with_cache_compare( - channel_data_with_cache, self.channel_data + channel_data_cache = get_channel_data_cache_with_compare( + channel_data_cache, self.channel_data ) with open(resource_path("output/result_cache.pkl"), "wb") as file: - pickle.dump(channel_data_with_cache, file) + pickle.dump(channel_data_cache, file) if open_sort: user_log_file = "output/" + ( "user_result.log" diff --git a/utils/channel.py b/utils/channel.py index 3521c2a585..2a3f896ef6 100644 --- a/utils/channel.py +++ b/utils/channel.py @@ -50,29 +50,28 @@ def cleanup_logging(): os.remove(log_path) -def get_channel_data_from_file(channels=None, file=None, use_old=False): +def get_channel_data_from_file(channels, file, use_old): """ Get the channel data from the file """ current_category = "" - pattern = r"^(.*?)(,(?!#genre#)(.*?))?$" + pattern = re.compile(r"^(.*?)(,(?!#genre#)(.*?))?$") for line in file: line = line.strip() if "#genre#" in line: - # This is a new channel, create a new key in the dictionary. current_category = line.split(",")[0] else: - # This is a url, add it to the list of urls for the current channel. - match = re.search(pattern, line) + match = pattern.search(line) if match is not None and match.group(1): name = match.group(1).strip() - if name not in channels[current_category]: - channels[current_category][name] = [] + category_dict = channels[current_category] + if name not in category_dict: + category_dict[name] = [] if use_old and match.group(3): - url = match.group(3).strip() - if url and url not in channels[current_category][name]: - channels[current_category][name].append(url) + info = (match.group(3).strip(), None, None) + if info[0] and info not in category_dict[name]: + category_dict[name].append(info) return channels @@ -86,23 +85,20 @@ def get_channel_items(): if os.path.exists(resource_path(user_source_file)): with open(resource_path(user_source_file), "r", encoding="utf-8") as file: - channels = get_channel_data_from_file( - channels=channels, file=file, use_old=open_use_old_result - ) - - if open_use_old_result and os.path.exists(resource_path("output/result_cache.pkl")): - with open(resource_path("output/result_cache.pkl"), "rb") as file: - old_result = pickle.load(file) - for cate, data in channels.items(): - if cate in old_result: - for name, urls in data.items(): - if name in old_result[cate]: - old_urls = [ - url - for info in old_result[cate][name] - for url, _, _ in info - ] - channels[cate][name] = set(urls + old_urls) + channels = get_channel_data_from_file(channels, file, open_use_old_result) + + if open_use_old_result: + result_cache_path = resource_path("output/result_cache.pkl") + if os.path.exists(result_cache_path): + with open(resource_path("output/result_cache.pkl"), "rb") as file: + old_result = pickle.load(file) + for cate, data in channels.items(): + if cate in old_result: + for name, info_list in data.items(): + if name in old_result[cate]: + for info in old_result[cate][name]: + if info not in info_list: + channels[cate][name].append(info) return channels @@ -520,7 +516,7 @@ def append_all_method_data( Append all method data to total info data """ for cate, channel_obj in items: - for name, old_urls in channel_obj.items(): + for name, old_info_list in channel_obj.items(): for method, result in [ ("hotel_fofa", hotel_fofa_result), ("multicast", multicast_result), @@ -553,9 +549,9 @@ def append_all_method_data( data, cate, name, - [(url, None, None) for url in old_urls], + old_info_list, ) - print(name, "using old num:", len(old_urls)) + print(name, "using old num:", len(old_info_list)) print( name, "total num:", @@ -593,14 +589,14 @@ def append_all_method_data_keep_all( data = append_data_to_info_data(data, cate, name, urls) print(name, f"{method.capitalize()} num:", len(urls)) if config.getboolean("Settings", "open_use_old_result"): - old_urls = channel_obj.get(name, []) + old_info_list = channel_obj.get(name, []) data = append_data_to_info_data( data, cate, name, - [(url, None, None) for url in old_urls], + old_info_list, ) - print(name, "using old num:", len(old_urls)) + print(name, "using old num:", len(old_info_list)) return data @@ -740,7 +736,7 @@ def get_multicast_fofa_search_urls(): return search_urls -def get_channel_data_with_cache_compare(data, new_data): +def get_channel_data_cache_with_compare(data, new_data): """ Get channel data with cache compare new data """ @@ -754,10 +750,6 @@ def match_url(url, sort_urls): if url_info and cate in data and name in data[cate]: new_urls = {new_url for new_url, _, _ in url_info} data[cate][name] = [ - url - for info in data[cate][name] - for url, _, _ in info - if match_url(url, new_urls) + info for info in data[cate][name] if match_url(info[0], new_urls) ] - return data