From bcc8a7ece7a2edc0f2c86208dd798874a38a6c6b Mon Sep 17 00:00:00 2001
From: "guorong.zheng" <360996299@qq.com>
Date: Tue, 24 Sep 2024 11:31:26 +0800
Subject: [PATCH] chore:result cache

---
 main.py          | 10 +++----
 utils/channel.py | 70 +++++++++++++++++++++---------------------------
 2 files changed, 36 insertions(+), 44 deletions(-)

diff --git a/main.py b/main.py
index 88bcf783c7..b8efed4b7e 100644
--- a/main.py
+++ b/main.py
@@ -7,7 +7,7 @@
     write_channel_to_file,
     setup_logging,
     cleanup_logging,
-    get_channel_data_with_cache_compare,
+    get_channel_data_cache_with_compare,
 )
 from utils.tools import (
     update_file,
@@ -155,7 +155,7 @@ async def main(self):
                 self.subscribe_result,
                 self.online_search_result,
             )
-            channel_data_with_cache = self.channel_data
+            channel_data_cache = self.channel_data
             self.total = self.get_urls_len(filter=True)
             sort_callback = lambda: self.pbar_update(name="测速")
             open_sort = config.getboolean("Settings", "open_sort")
@@ -190,11 +190,11 @@ async def main(self):
                 shutil.copy(user_final_file, result_file)
             if config.getboolean("Settings", "open_use_old_result"):
                 if open_sort:
-                    channel_data_with_cache = get_channel_data_with_cache_compare(
-                        channel_data_with_cache, self.channel_data
+                    channel_data_cache = get_channel_data_cache_with_compare(
+                        channel_data_cache, self.channel_data
                     )
                 with open(resource_path("output/result_cache.pkl"), "wb") as file:
-                    pickle.dump(channel_data_with_cache, file)
+                    pickle.dump(channel_data_cache, file)
             if open_sort:
                 user_log_file = "output/" + (
                     "user_result.log"
diff --git a/utils/channel.py b/utils/channel.py
index 3521c2a585..2a3f896ef6 100644
--- a/utils/channel.py
+++ b/utils/channel.py
@@ -50,29 +50,28 @@ def cleanup_logging():
         os.remove(log_path)
 
 
-def get_channel_data_from_file(channels=None, file=None, use_old=False):
+def get_channel_data_from_file(channels, file, use_old):
     """
     Get the channel data from the file
     """
     current_category = ""
-    pattern = r"^(.*?)(,(?!#genre#)(.*?))?$"
+    pattern = re.compile(r"^(.*?)(,(?!#genre#)(.*?))?$")
 
     for line in file:
         line = line.strip()
         if "#genre#" in line:
-            # This is a new channel, create a new key in the dictionary.
             current_category = line.split(",")[0]
         else:
-            # This is a url, add it to the list of urls for the current channel.
-            match = re.search(pattern, line)
+            match = pattern.search(line)
             if match is not None and match.group(1):
                 name = match.group(1).strip()
-                if name not in channels[current_category]:
-                    channels[current_category][name] = []
+                category_dict = channels[current_category]
+                if name not in category_dict:
+                    category_dict[name] = []
                 if use_old and match.group(3):
-                    url = match.group(3).strip()
-                    if url and url not in channels[current_category][name]:
-                        channels[current_category][name].append(url)
+                    info = (match.group(3).strip(), None, None)
+                    if info[0] and info not in category_dict[name]:
+                        category_dict[name].append(info)
     return channels
 
 
@@ -86,23 +85,20 @@ def get_channel_items():
 
     if os.path.exists(resource_path(user_source_file)):
         with open(resource_path(user_source_file), "r", encoding="utf-8") as file:
-            channels = get_channel_data_from_file(
-                channels=channels, file=file, use_old=open_use_old_result
-            )
-
-    if open_use_old_result and os.path.exists(resource_path("output/result_cache.pkl")):
-        with open(resource_path("output/result_cache.pkl"), "rb") as file:
-            old_result = pickle.load(file)
-            for cate, data in channels.items():
-                if cate in old_result:
-                    for name, urls in data.items():
-                        if name in old_result[cate]:
-                            old_urls = [
-                                url
-                                for info in old_result[cate][name]
-                                for url, _, _ in info
-                            ]
-                            channels[cate][name] = set(urls + old_urls)
+            channels = get_channel_data_from_file(channels, file, open_use_old_result)
+
+    if open_use_old_result:
+        result_cache_path = resource_path("output/result_cache.pkl")
+        if os.path.exists(result_cache_path):
+            with open(resource_path("output/result_cache.pkl"), "rb") as file:
+                old_result = pickle.load(file)
+                for cate, data in channels.items():
+                    if cate in old_result:
+                        for name, info_list in data.items():
+                            if name in old_result[cate]:
+                                for info in old_result[cate][name]:
+                                    if info not in info_list:
+                                        channels[cate][name].append(info)
     return channels
 
 
@@ -520,7 +516,7 @@ def append_all_method_data(
     Append all method data to total info data
     """
     for cate, channel_obj in items:
-        for name, old_urls in channel_obj.items():
+        for name, old_info_list in channel_obj.items():
             for method, result in [
                 ("hotel_fofa", hotel_fofa_result),
                 ("multicast", multicast_result),
@@ -553,9 +549,9 @@ def append_all_method_data(
                     data,
                     cate,
                     name,
-                    [(url, None, None) for url in old_urls],
+                    old_info_list,
                 )
-                print(name, "using old num:", len(old_urls))
+                print(name, "using old num:", len(old_info_list))
             print(
                 name,
                 "total num:",
@@ -593,14 +589,14 @@ def append_all_method_data_keep_all(
                     data = append_data_to_info_data(data, cate, name, urls)
                     print(name, f"{method.capitalize()} num:", len(urls))
                     if config.getboolean("Settings", "open_use_old_result"):
-                        old_urls = channel_obj.get(name, [])
+                        old_info_list = channel_obj.get(name, [])
                         data = append_data_to_info_data(
                             data,
                             cate,
                             name,
-                            [(url, None, None) for url in old_urls],
+                            old_info_list,
                         )
-                        print(name, "using old num:", len(old_urls))
+                        print(name, "using old num:", len(old_info_list))
     return data
 
 
@@ -740,7 +736,7 @@ def get_multicast_fofa_search_urls():
     return search_urls
 
 
-def get_channel_data_with_cache_compare(data, new_data):
+def get_channel_data_cache_with_compare(data, new_data):
     """
     Get channel data with cache compare new data
     """
@@ -754,10 +750,6 @@ def match_url(url, sort_urls):
             if url_info and cate in data and name in data[cate]:
                 new_urls = {new_url for new_url, _, _ in url_info}
                 data[cate][name] = [
-                    url
-                    for info in data[cate][name]
-                    for url, _, _ in info
-                    if match_url(url, new_urls)
+                    info for info in data[cate][name] if match_url(info[0], new_urls)
                 ]
-
     return data