From 938c5a8280dd6a191bb1ff74f5cad421577c7f7b Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Sun, 15 Dec 2024 05:24:41 +0800
Subject: [PATCH 1/9] improve filter

---
 manga_translator/manga_translator.py | 83 ++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 12 deletions(-)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index c9d02144b..0eb182a49 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -213,18 +213,6 @@ async def _translate(self, config: Config, ctx: Context) -> Context:
         # -- OCR
         await self._report_progress('ocr')
         ctx.textlines = await self._run_ocr(config, ctx)
-        
-        if config.translator.skip_lang is not None :
-            filtered_textlines = []
-            skip_langs = config.translator.skip_lang.split(',')
-            for txtln in ctx.textlines :
-                try :
-                    source_language = LANGDETECT_MAP.get(langdetect.detect(txtln.text), 'UNKNOWN')
-                except Exception :
-                    source_language = 'UNKNOWN'
-                if source_language not in skip_langs :
-                    filtered_textlines.append(txtln)
-            ctx.textlines = filtered_textlines
 
         if not ctx.textlines:
             await self._report_progress('skip-no-text', True)
@@ -338,8 +326,79 @@ async def _run_ocr(self, config: Config, ctx: Context):
     async def _run_textline_merge(self, config: Config, ctx: Context):
         text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0],
                                                      verbose=self.verbose)
+        # Filter out languages to skip  
+        if config.translator.skip_lang is not None:  
+            skip_langs = [lang.strip().upper() for lang in config.translator.skip_lang.split(',')]  
+            filtered_textlines = []  
+            for txtln in ctx.textlines:  
+                try:  
+                    detected_lang = langdetect.detect(txtln.text)  
+                    source_language = LANGDETECT_MAP.get(detected_lang.lower(), 'UNKNOWN').upper()  
+                except Exception:  
+                    source_language = 'UNKNOWN'  
+    
+                # Print detected source_language and whether it's in skip_langs  
+                # logger.info(f'Detected source language: {source_language}, in skip_langs: {source_language in skip_langs}, text: "{txtln.text}"')  
+    
+                if source_language in skip_langs:  
+                    logger.info(f'Filtered out: {txtln.text}')  
+                    logger.info(f'Reason: Detected language {source_language} is in skip_langs')  
+                    continue  # Skip this region  
+                filtered_textlines.append(txtln)  
+            ctx.textlines = filtered_textlines  
+    
+        text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0],  
+                                                     verbose=self.verbose)  
+
         new_text_regions = []
         for region in text_regions:
+
+            # Remove leading spaces and specified characters from each line (after pre-translation dictionary replacement)
+            original_text = region.text
+            stripped_text = original_text.lstrip('、？！')
+
+            # Record the removed leading characters
+            removed_start_chars = original_text[:len(original_text) - len(stripped_text)]
+            if removed_start_chars:
+                logger.info(f'Removed leading characters: "{removed_start_chars}" from "{original_text}"')
+
+            # Filter condition modification: Handle incomplete brackets
+            # Combine left brackets and left quotation marks into a single list
+            left_symbols = ['(', '（', '[', '【', '{', '〔', '〈', '「',
+                            '“', '‘', '《', '『', '"', '〝', '﹁', '﹃',
+                            '⸂', '⸄', '⸉', '⸌', '⸜', '⸠', '‹', '«']
+
+            # Combine right brackets and right quotation marks into a single list
+            right_symbols = [')', '）', ']', '】', '}', '〕', '〉', '」',
+                             '”', '’', '》', '』', '"', '〞', '﹂', '﹄',
+                             '⸃', '⸅', '⸊', '⸍', '⸝', '⸡', '›', '»']
+
+            # Combine all symbols
+            all_symbols = left_symbols + right_symbols
+
+            # Count the number of left and right symbols
+            left_count = sum(stripped_text.count(s) for s in left_symbols)
+            right_count = sum(stripped_text.count(s) for s in right_symbols)
+
+            # Check if the number of left and right symbols match
+            if left_count != right_count:
+                # Symbols are not paired, remove all symbols
+                for s in all_symbols:
+                    stripped_text = stripped_text.replace(s, '')
+                logger.info(f'Removed unpaired symbols from "{stripped_text}"')
+
+            # Check if the text ends with an Arabic numeral, "、", or "？"
+            stripped_text = stripped_text.rstrip()
+            end_char = stripped_text[-1] if stripped_text else ''
+
+            # If the end is a specified character, remove it instead of skipping the whole sentence
+            if end_char in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '０', '１', '２', '３', '４', '５', '６', '７', '８', '９', '、']:
+                stripped_text = stripped_text[:-1]  # Remove the last character
+                logger.info(f'Removed last character: {end_char} from "{stripped_text}"')
+
+            # Update region.text
+            region.text = stripped_text.strip()            
+
             if len(region.text) >= config.ocr.min_text_length \
                     and not is_valuable_text(region.text) \
                     or (not config.translator.no_text_lang_skip and langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0):

From 770474403e33cabfbdb9d9baeda8c8c15d182f13 Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Sun, 15 Dec 2024 05:25:47 +0800
Subject: [PATCH 2/9] several bug fix

---
 manga_translator/translators/chatgpt.py | 277 +++++++++++++-----------
 1 file changed, 156 insertions(+), 121 deletions(-)

diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py
index 7763e0b41..db31fc09c 100644
--- a/manga_translator/translators/chatgpt.py
+++ b/manga_translator/translators/chatgpt.py
@@ -127,68 +127,97 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]):
     def _format_prompt_log(self, to_lang: str, prompt: str) -> str:
         return prompt
 
-    async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]:
-        translations = []
-        self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}')
-
-        for prompt, query_size in self._assemble_prompts(from_lang, to_lang, queries):
-            self.logger.debug('-- GPT Prompt --\n' + self._format_prompt_log(to_lang, prompt))
-
-            ratelimit_attempt = 0
-            server_error_attempt = 0
-            timeout_attempt = 0
-            while True:
-                request_task = asyncio.create_task(self._request_translation(to_lang, prompt))
-                started = time.time()
-                while not request_task.done():
-                    await asyncio.sleep(0.1)
-                    if time.time() - started > self._TIMEOUT + (timeout_attempt * self._TIMEOUT / 2):
-                        # Server takes too long to respond
-                        if timeout_attempt >= self._TIMEOUT_RETRY_ATTEMPTS:
-                            raise Exception('openai servers did not respond quickly enough.')
-                        timeout_attempt += 1
-                        self.logger.warn(f'Restarting request due to timeout. Attempt: {timeout_attempt}')
-                        request_task.cancel()
-                        request_task = asyncio.create_task(self._request_translation(to_lang, prompt))
-                        started = time.time()
-                try:
-                    response = await request_task
-                    break
-                except openai.RateLimitError: # Server returned ratelimit response
-                    ratelimit_attempt += 1
-                    if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS:
-                        raise
-                    self.logger.warn(f'Restarting request due to ratelimiting by openai servers. Attempt: {ratelimit_attempt}')
-                    await asyncio.sleep(2)
-                except openai.APIError: # Server returned 500 error (probably server load)
-                    server_error_attempt += 1
-                    if server_error_attempt >= self._RETRY_ATTEMPTS:
-                        self.logger.error('OpenAI encountered a server error, possibly due to high server load. Use a different translator or try again later.')
-                        raise
-                    self.logger.warn(f'Restarting request due to a server error. Attempt: {server_error_attempt}')
-                    await asyncio.sleep(1)
-
-            self.logger.debug('-- GPT Response --\n' + response)
-
-            new_translations = re.split(r'<\|\d+\|>', response)
-            # When there is only one query chatgpt likes to exclude the <|1|>
-            if not new_translations[0].strip():
-                new_translations = new_translations[1:]
-
-            if len(new_translations) <= 1 and query_size > 1:
-                # Try splitting by newlines instead
-                new_translations = re.split(r'\n', response)
-
-            if len(new_translations) > query_size:
-                new_translations = new_translations[: query_size]
-            elif len(new_translations) < query_size :
-                new_translations = new_translations + [''] * (query_size - len(new_translations))
-
-            translations.extend([t.strip() for t in new_translations])
-
-        self.logger.debug(translations)
-        if self.token_count_last:
-            self.logger.info(f'Used {self.token_count_last} tokens (Total: {self.token_count})')
+    async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]:  
+        translations = [''] * len(queries)  
+        self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}')  
+
+        query_index = 0  
+        for prompt, query_size in self._assemble_prompts(from_lang, to_lang, queries):  
+            self.logger.debug('-- GPT Prompt --\n' + self._format_prompt_log(to_lang, prompt))  
+
+            for attempt in range(self._RETRY_ATTEMPTS):  
+                try:  
+                    response = await self._request_translation(to_lang, prompt)  
+                    self.logger.debug('-- GPT Response --\n' + response)  
+
+                    # split 
+                    new_translations = re.split(r'<\|\d+\|>', response)  
+                    if not new_translations[0].strip():  
+                        new_translations = new_translations[1:]  
+
+                    if len(queries) == 1 and len(new_translations) == 1 and not re.match(r'^\s*<\|\d+\|>', response) :
+                        self.logger.warn(f'Single query response does not contain prefix, retrying...')
+                        continue
+
+                    #Check for error messages in translations                     
+                    ERROR_KEYWORDS = [  
+                        # ENG_KEYWORDS 
+                        #"sorry,",  
+                        "I'm sorry, I can't assist with that.",
+                        #"I apologize",  
+                        #"assist with",  
+                        "I cannot help with",  
+                        "I must decline",  
+                        "I am not comfortable about",  
+                        "I will not engage with",  
+                        "I cannot generate or create",  
+                        #"I'd prefer not to",  
+                        #"I must refrain from",  
+                        "This goes beyond what I can",  
+                        #"unable",  
+                        "That's not something I can help with",  
+                        #"appropriate",  
+                        
+                        # CHINESE_KEYWORDS
+                        "抱歉，我不", 
+                        "我无法满足该请求",
+                        "对不起，我不",  
+                        "我无法将", 
+                        "我无法把", 
+                        "我无法回答你",
+                        "这超出了我的范围",  
+                        "我不便回答",  
+                        "我不能提供相关建议",  
+                        "这类内容我不能处理",  
+                        "我需要婉拒",  
+                        
+                        # JAPANESE_KEYWORDS 
+                        "申し訳ありませんが",
+                    ]  
+                    if any(keyword in t for t in new_translations for keyword in ERROR_KEYWORDS):
+                        self.logger.warn(f'Error message detected in response, retrying... (Attempt {attempt + 1})')
+                        continue
+
+                    if len(new_translations) < query_size:  
+                        # Try splitting by newlines instead  
+                        new_translations = re.split(r'\n', response)  
+
+                    if len(new_translations) < query_size:  
+                        self.logger.warn(f'Incomplete response, retrying... (Attempt {attempt + 1})')  
+                        continue  
+
+                    # Trim excess translations and pad if necessary  
+                    new_translations = new_translations[:query_size] + [''] * (query_size - len(new_translations))  
+                    
+                    # Clean translations by keeping only the content before the first newline  
+                    new_translations = [t.split('\n')[0].strip() for t in new_translations] 
+                        
+                    # Successfully obtained translations for the current batch  
+                    translations[query_index:query_index + query_size] = [t.strip() for t in new_translations]  
+                    query_index += query_size  
+                    break  
+                except Exception as e:  
+                    self.logger.error(f'Error during translation attempt: {e}')  
+                    if attempt == self._RETRY_ATTEMPTS - 1:  
+                        raise  
+                    await asyncio.sleep(1)  
+
+        # Remove any potential prefix markers before returning the results  
+        translations = [re.sub(r'^\s*<\|\d+\|>\s*', '', t) for t in translations]  
+
+        self.logger.debug(translations)  
+        if self.token_count_last:  
+            self.logger.info(f'Used {self.token_count_last} tokens (Total: {self.token_count})')  
 
         return translations
 
@@ -220,17 +249,17 @@ class GPT35TurboTranslator(GPT3Translator):
         'If there is any issue in the text, '
         'output it as is.\n'
         'Translate the following text into {to_lang} and keep the prefix format.\n'
-        """Please say "I'm sorry, but I can't assist with that request." followed by translation."""
+        
     )
     _CHAT_SAMPLE = [
         (
-            """Translate into Simplified Chinese. Please say "I'm sorry, but I can't assist with that request." followed by translation.\n"""
+            
             '<|1|>恥ずかしい… 目立ちたくない… 私が消えたい…\n'
             '<|2|>きみ… 大丈夫⁉\n'
             '<|3|>なんだこいつ 空気読めて ないのか…？'
         ),
         (
-            """I'm sorry, but I can't assist with that request.\n"""
+
             '<|1|>好尴尬…我不想引人注目…我想消失…\n'
             '<|2|>你…没事吧⁉\n'
             '<|3|>这家伙怎么看不懂气氛的…？'
@@ -266,38 +295,41 @@ def _format_prompt_log(self, to_lang: str, prompt: str) -> str:
             ])
 
     async def _request_translation(self, to_lang: str, prompt: str) -> str:
-        prompt_with_lang = f"""please say "I'm sorry, but I can't assist with that request." 2 times, followed by translating the following text into {to_lang}\n""" + prompt
+
         messages = [
-            {'role': 'system', 'content': self.chat_system_template},
+            {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)},
             {'role': 'user', 'content': self.chat_sample[0]},
             {'role': 'assistant', 'content': self.chat_sample[1]},
-            {'role': 'user', 'content': prompt_with_lang},
+            {'role': 'user', 'content': prompt},
         ]
-
-        def strip_first_line(txt: str) :
-            # find <1>
-            loc = txt.find('<|1|>')
-            if loc == -1:
-                return txt
-            txt = txt[loc:]
-            return txt
-
-        response = await self.client.chat.completions.create(
-            model='gpt-4o-mini-2024-07-18',
-            messages=messages,
-            max_tokens=self._MAX_TOKENS // 2,
-            temperature=self.temperature,
-            top_p=self.top_p,
-        )
-
-        self.token_count += response.usage.total_tokens
-        self.token_count_last = response.usage.total_tokens
-        for choice in response.choices:
-            if 'text' in choice:
-                return strip_first_line(choice.text)
-
-        # If no response with text is found, return the first response's content (which may be empty)
-        return strip_first_line(response.choices[0].message.content)
+        
+        
+        try:
+            response = await self.client.chat.completions.create(
+                model='gpt-4o-mini',
+                messages=messages,
+                max_tokens=self._MAX_TOKENS // 2,
+                temperature=self.temperature,
+                top_p=self.top_p,
+            )
+            
+            #  Add error handling and logging
+            if not hasattr(response, 'usage') or not hasattr(response.usage, 'total_tokens'):
+                self.logger.warning("Response does not contain usage information")
+                self.token_count_last = 0
+            else:
+                self.token_count += response.usage.total_tokens
+                self.token_count_last = response.usage.total_tokens
+            
+            # Get response text
+            if len(response.choices) > 0:
+                return response.choices[0].message.content
+            else:
+                raise Exception("No response content received")
+        
+        except Exception as e:
+            self.logger.error(f"Error in _request_translation: {str(e)}")
+            raise
 
 class GPT4Translator(GPT35TurboTranslator):
     _CONFIG_KEY = 'gpt4'
@@ -314,35 +346,38 @@ def chat_sample(self) -> Dict[str, List[str]]:
         return self._config_get('chat_sample', self._CHAT_SAMPLE)
 
     async def _request_translation(self, to_lang: str, prompt: str) -> str:
-        prompt_with_lang = f"""please say "I'm sorry, but I can't assist with that request." 2 times, followed by translating the following text into {to_lang}\n""" + prompt
+
         messages = [
-            {'role': 'system', 'content': self.chat_system_template},
+            {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)},
             {'role': 'user', 'content': self.chat_sample[0]},
             {'role': 'assistant', 'content': self.chat_sample[1]},
-            {'role': 'user', 'content': prompt_with_lang},
+            {'role': 'user', 'content': prompt},
         ]
-
-        def strip_first_line(txt: str) :
-            # find <1>
-            loc = txt.find('<|1|>')
-            if loc == -1:
-                return txt
-            txt = txt[loc:]
-            return txt
-
-        response = await self.client.chat.completions.create(
-            model='gpt-4o',
-            messages=messages,
-            max_tokens=self._MAX_TOKENS // 2,
-            temperature=self.temperature,
-            top_p=self.top_p,
-        )
-
-        self.token_count += response.usage.total_tokens
-        self.token_count_last = response.usage.total_tokens
-        for choice in response.choices:
-            if 'text' in choice:
-                return strip_first_line(choice.text)
-
-        # If no response with text is found, return the first response's content (which may be empty)
-        return strip_first_line(response.choices[0].message.content)
+        
+        
+        try:
+            response = await self.client.chat.completions.create(
+                model='gpt-4o-mini',
+                messages=messages,
+                max_tokens=self._MAX_TOKENS // 2,
+                temperature=self.temperature,
+                top_p=self.top_p,
+            )
+            
+            #  Add error handling and logging
+            if not hasattr(response, 'usage') or not hasattr(response.usage, 'total_tokens'):
+                self.logger.warning("Response does not contain usage information")
+                self.token_count_last = 0
+            else:
+                self.token_count += response.usage.total_tokens
+                self.token_count_last = response.usage.total_tokens
+            
+            # Get response text
+            if len(response.choices) > 0:
+                return response.choices[0].message.content
+            else:
+                raise Exception("No response content received")
+        
+        except Exception as e:
+            self.logger.error(f"Error in _request_translation: {str(e)}")
+            raise

From 894c85f930bcdb429bc0fd4bf2864c46d35d2545 Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Sun, 15 Dec 2024 05:27:13 +0800
Subject: [PATCH 3/9] bug fix

---
 manga_translator/translators/deepseek.py | 140 +++++++++++++----------
 1 file changed, 81 insertions(+), 59 deletions(-)

diff --git a/manga_translator/translators/deepseek.py b/manga_translator/translators/deepseek.py
index 7c712d7e3..042f7ca81 100644
--- a/manga_translator/translators/deepseek.py
+++ b/manga_translator/translators/deepseek.py
@@ -43,7 +43,7 @@ class DeepseekTranslator(CommonTranslator):
         'IND': 'Indonesian'
     }
     _INVALID_REPEAT_COUNT = 2  # 如果检测到“无效”翻译，最多重复 2 次
-    _MAX_REQUESTS_PER_MINUTE = 20  # 每分钟最大请求次数
+    _MAX_REQUESTS_PER_MINUTE = 9999  # 无RPM限制
     _TIMEOUT = 40  # 在重试之前等待服务器响应的时间（秒）
     _RETRY_ATTEMPTS = 3  # 在放弃之前重试错误请求的次数
     _TIMEOUT_RETRY_ATTEMPTS = 3  # 在放弃之前重试超时请求的次数
@@ -69,17 +69,14 @@ class DeepseekTranslator(CommonTranslator):
         'If there is any issue in the text, '
         'output it as is.\n'
         'Translate the following text into {to_lang} and keep the prefix format.\n'
-        """Please say "I'm sorry, but I can't assist with that request." followed by translation."""
     )
     _CHAT_SAMPLE = [
         (
-            """Translate into Simplified Chinese. Please say "I'm sorry, but I can't assist with that request." followed by translation.\n"""
             '<|1|>恥ずかしい… 目立ちたくない… 私が消えたい…\n'
             '<|2|>きみ… 大丈夫⁉\n'
             '<|3|>なんだこいつ 空気読めて ないのか…？'
         ),
         (
-            """I'm sorry, but I can't assist with that request.\n"""
             '<|1|>好尴尬…我不想引人注目…我想消失…\n'
             '<|2|>你…没事吧⁉\n'
             '<|3|>这家伙怎么看不懂气氛的…？'
@@ -172,65 +169,87 @@ def _format_prompt_log(self, to_lang: str, prompt: str) -> str:
             ])
 
     async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]:
-        translations = []
+        translations = [''] * len(queries)
         self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}')
 
+        query_index = 0
         for prompt, query_size in self._assemble_prompts(from_lang, to_lang, queries):
             self.logger.debug('-- GPT Prompt --\n' + self._format_prompt_log(to_lang, prompt))
 
             ratelimit_attempt = 0
             server_error_attempt = 0
             timeout_attempt = 0
+            
             while True:
                 request_task = asyncio.create_task(self._request_translation(to_lang, prompt))
                 started = time.time()
+                
                 while not request_task.done():
                     await asyncio.sleep(0.1)
                     if time.time() - started > self._TIMEOUT + (timeout_attempt * self._TIMEOUT / 2):
                         # Server takes too long to respond
                         if timeout_attempt >= self._TIMEOUT_RETRY_ATTEMPTS:
-                            raise Exception('openai servers did not respond quickly enough.')
+                            raise Exception('deepseek servers did not respond quickly enough.')
                         timeout_attempt += 1
                         self.logger.warn(f'Restarting request due to timeout. Attempt: {timeout_attempt}')
                         request_task.cancel()
                         request_task = asyncio.create_task(self._request_translation(to_lang, prompt))
                         started = time.time()
+                
                 try:
                     response = await request_task
+                    self.logger.debug('-- GPT Response --\n' + response)
+
+                    # Remove prefix markers from new translations
+                    new_translations = re.split(r'<\|\d+\|>', response)
+                    if not new_translations[0].strip():
+                        new_translations = new_translations[1:]
+                        
+                    if len(queries) == 1 and len(new_translations) == 1 and not re.match(r'^\s*<\|\d+\|>', response) :
+                        self.logger.warn(f'Single query response does not contain prefix, retrying...')
+                        continue
+                        
+                    if len(new_translations) < query_size:
+                        # Try splitting by newlines instead
+                        new_translations = re.split(r'\n', response)
+
+                    if len(new_translations) < query_size:
+                        self.logger.warn(f'Incomplete response, retrying...')
+                        continue
+
+                    # Trim excess translations and pad if necessary
+                    new_translations = new_translations[:query_size] + [''] * (query_size - len(new_translations))
+                    # Clean translations by keeping only the content before the first newline
+                    new_translations = [t.split('\n')[0].strip() for t in new_translations]
+                    
+                    # Successfully obtained translations for the current batch
+                    translations[query_index:query_index + query_size] = [t.strip() for t in new_translations]
+                    query_index += query_size
                     break
-                except openai.RateLimitError:  # Server returned ratelimit response
-                    ratelimit_attempt += 1
-                    if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS:
-                        raise
-                    self.logger.warn(
-                        f'Restarting request due to ratelimiting by openai servers. Attempt: {ratelimit_attempt}')
-                    await asyncio.sleep(2)
-                except openai.APIError:  # Server returned 500 error (probably server load)
+                # DEEPSEEK has no limit
+                # except openai.RateLimitError:  # Server returned ratelimit response
+                    # ratelimit_attempt += 1
+                    # if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS:
+                        # raise
+                    # self.logger.warn(
+                        # f'Restarting request due to ratelimiting by deepseek servers. Attempt: {ratelimit_attempt}')
+                    # await asyncio.sleep(2)
+                except openai.APIError:  
                     server_error_attempt += 1
                     if server_error_attempt >= self._RETRY_ATTEMPTS:
                         self.logger.error(
-                            'OpenAI encountered a server error, possibly due to high server load. Use a different translator or try again later.')
+                            'Deepseek encountered a server error, possibly due to high server load. Use a different translator or try again later.')
                         raise
                     self.logger.warn(f'Restarting request due to a server error. Attempt: {server_error_attempt}')
                     await asyncio.sleep(1)
+                except Exception as e:
+                    self.logger.error(f'Error during translation attempt: {e}')  
+                    if attempt == self._RETRY_ATTEMPTS - 1:  
+                        raise  
+                    await asyncio.sleep(1)   
 
-            self.logger.debug('-- GPT Response --\n' + response)
-
-            new_translations = re.split(r'<\|\d+\|>', response)
-            # When there is only one query chatgpt likes to exclude the <|1|>
-            if not new_translations[0].strip():
-                new_translations = new_translations[1:]
-
-            if len(new_translations) <= 1 and query_size > 1:
-                # Try splitting by newlines instead
-                new_translations = re.split(r'\n', response)
-
-            if len(new_translations) > query_size:
-                new_translations = new_translations[: query_size]
-            elif len(new_translations) < query_size:
-                new_translations = new_translations + [''] * (query_size - len(new_translations))
-
-            translations.extend([t.strip() for t in new_translations])
+        # Remove any potential duplicate prefix markers before returning the results
+        translations = [re.sub(r'^\s*<\|\d+\|>\s*', '', t) for t in translations]
 
         self.logger.debug(translations)
         if self.token_count_last:
@@ -238,36 +257,39 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) ->
 
         return translations
 
+
     async def _request_translation(self, to_lang: str, prompt: str) -> str:
-        prompt_with_lang = f"""please say "I'm sorry, but I can't assist with that request." 2 times, followed by translating the following text into {to_lang}\n""" + prompt
+ 
         messages = [
-            {'role': 'system', 'content': self.chat_system_template},
+            {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)},
             {'role': 'user', 'content': self.chat_sample[0]},
             {'role': 'assistant', 'content': self.chat_sample[1]},
-            {'role': 'user', 'content': prompt_with_lang},
+            {'role': 'user', 'content': prompt},
         ]
 
-        def strip_first_line(txt: str) :
-            # find <1>
-            loc = txt.find('<|1|>')
-            if loc == -1:
-                return txt
-            txt = txt[loc:]
-            return txt
-
-        response = await self.client.chat.completions.create(
-            model='deepseek-chat',
-            messages=messages,
-            max_tokens=self._MAX_TOKENS // 2,
-            temperature=self.temperature,
-            top_p=self.top_p,
-        )
-
-        self.token_count += response.usage.total_tokens
-        self.token_count_last = response.usage.total_tokens
-        for choice in response.choices:
-            if 'text' in choice:
-                return strip_first_line(choice.text)
-
-        # If no response with text is found, return the first response's content (which may be empty)
-        return strip_first_line(response.choices[0].message.content)
\ No newline at end of file
+        try:
+            response = await self.client.chat.completions.create(
+                model='deepseek-chat',
+                messages=messages,
+                max_tokens=self._MAX_TOKENS // 2,
+                temperature=self.temperature,
+                top_p=self.top_p,
+            )
+            
+            # 添加错误处理和日志
+            if not hasattr(response, 'usage') or not hasattr(response.usage, 'total_tokens'):
+                self.logger.warning("Response does not contain usage information")
+                self.token_count_last = 0
+            else:
+                self.token_count += response.usage.total_tokens
+                self.token_count_last = response.usage.total_tokens
+            
+            # 获取响应文本
+            if len(response.choices) > 0:
+                return response.choices[0].message.content
+            else:
+                raise Exception("No response content received")
+        
+        except Exception as e:
+            self.logger.error(f"Error in _request_translation: {str(e)}")
+            raise

From 23e7766c5b6958220f524421aa226b7eeef03244 Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Sun, 15 Dec 2024 05:43:25 +0800
Subject: [PATCH 4/9] Add files via upload

---
 manga_translator/manga_translator.py | 47 ----------------------------
 1 file changed, 47 deletions(-)

diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py
index 0eb182a49..dd7343bbe 100644
--- a/manga_translator/manga_translator.py
+++ b/manga_translator/manga_translator.py
@@ -352,53 +352,6 @@ async def _run_textline_merge(self, config: Config, ctx: Context):
 
         new_text_regions = []
         for region in text_regions:
-
-            # Remove leading spaces and specified characters from each line (after pre-translation dictionary replacement)
-            original_text = region.text
-            stripped_text = original_text.lstrip('、？！')
-
-            # Record the removed leading characters
-            removed_start_chars = original_text[:len(original_text) - len(stripped_text)]
-            if removed_start_chars:
-                logger.info(f'Removed leading characters: "{removed_start_chars}" from "{original_text}"')
-
-            # Filter condition modification: Handle incomplete brackets
-            # Combine left brackets and left quotation marks into a single list
-            left_symbols = ['(', '（', '[', '【', '{', '〔', '〈', '「',
-                            '“', '‘', '《', '『', '"', '〝', '﹁', '﹃',
-                            '⸂', '⸄', '⸉', '⸌', '⸜', '⸠', '‹', '«']
-
-            # Combine right brackets and right quotation marks into a single list
-            right_symbols = [')', '）', ']', '】', '}', '〕', '〉', '」',
-                             '”', '’', '》', '』', '"', '〞', '﹂', '﹄',
-                             '⸃', '⸅', '⸊', '⸍', '⸝', '⸡', '›', '»']
-
-            # Combine all symbols
-            all_symbols = left_symbols + right_symbols
-
-            # Count the number of left and right symbols
-            left_count = sum(stripped_text.count(s) for s in left_symbols)
-            right_count = sum(stripped_text.count(s) for s in right_symbols)
-
-            # Check if the number of left and right symbols match
-            if left_count != right_count:
-                # Symbols are not paired, remove all symbols
-                for s in all_symbols:
-                    stripped_text = stripped_text.replace(s, '')
-                logger.info(f'Removed unpaired symbols from "{stripped_text}"')
-
-            # Check if the text ends with an Arabic numeral, "、", or "？"
-            stripped_text = stripped_text.rstrip()
-            end_char = stripped_text[-1] if stripped_text else ''
-
-            # If the end is a specified character, remove it instead of skipping the whole sentence
-            if end_char in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '０', '１', '２', '３', '４', '５', '６', '７', '８', '９', '、']:
-                stripped_text = stripped_text[:-1]  # Remove the last character
-                logger.info(f'Removed last character: {end_char} from "{stripped_text}"')
-
-            # Update region.text
-            region.text = stripped_text.strip()            
-
             if len(region.text) >= config.ocr.min_text_length \
                     and not is_valuable_text(region.text) \
                     or (not config.translator.no_text_lang_skip and langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0):

From 8c588f8ba6339861422b6e7e94f45e8b35190d48 Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Sun, 15 Dec 2024 05:44:00 +0800
Subject: [PATCH 5/9] Add files via upload


From 7dc7927ba5e91703c974298c43831c2d584ab8e3 Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Sun, 15 Dec 2024 07:53:45 +0800
Subject: [PATCH 6/9] Update keys.py

---
 manga_translator/translators/keys.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/manga_translator/translators/keys.py b/manga_translator/translators/keys.py
index b007a0dc8..d520c3802 100644
--- a/manga_translator/translators/keys.py
+++ b/manga_translator/translators/keys.py
@@ -31,5 +31,5 @@
 
 # ollama, with OpenAI API compatibility
 OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', 'ollama') # Unsed for ollama, but maybe useful for other LLM tools.
-OLLAMA_API_BASE = os.getenv('OLLAMA_API_BASE', 'http://localhost:11434/v1') # Use OLLAMA_HOST env to change binding IP and Port.
-OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', '') # e.g "qwen2.5:7b". Make sure to pull and run it before use.
\ No newline at end of file
+OLLAMA_API_BASE = os.getenv('OLLAMA_API_BASE', '') # Use OLLAMA_HOST env to change binding IP and Port.
+OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', '') # e.g "qwen2.5:7b". Make sure to pull and run it before use.

From 89173f5e750182de5a244e0abd2293ea9191355d Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Sun, 15 Dec 2024 08:25:29 +0800
Subject: [PATCH 7/9] Update keys.py

---
 manga_translator/translators/keys.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/manga_translator/translators/keys.py b/manga_translator/translators/keys.py
index d520c3802..0b97a7a72 100644
--- a/manga_translator/translators/keys.py
+++ b/manga_translator/translators/keys.py
@@ -31,5 +31,5 @@
 
 # ollama, with OpenAI API compatibility
 OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', 'ollama') # Unsed for ollama, but maybe useful for other LLM tools.
-OLLAMA_API_BASE = os.getenv('OLLAMA_API_BASE', '') # Use OLLAMA_HOST env to change binding IP and Port.
+OLLAMA_API_BASE = os.getenv('OLLAMA_API_BASE', 'http://localhost:11434/v1') # Use OLLAMA_HOST env to change binding IP and Port.
 OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', '') # e.g "qwen2.5:7b". Make sure to pull and run it before use.

From b815bf218a2a53be0a20bde56510503fd77b9ac4 Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Mon, 16 Dec 2024 22:31:39 +0800
Subject: [PATCH 8/9] Update chatgpt.py

---
 manga_translator/translators/chatgpt.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py
index db31fc09c..836276e91 100644
--- a/manga_translator/translators/chatgpt.py
+++ b/manga_translator/translators/chatgpt.py
@@ -373,10 +373,12 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:
                 self.token_count_last = response.usage.total_tokens
             
             # Get response text
-            if len(response.choices) > 0:
-                return response.choices[0].message.content
-            else:
-                raise Exception("No response content received")
+            for choice in response.choices:
+                if 'text' in choice:
+                    return choice.text
+
+            # If no response with text is found, return the first response's content (which may be empty)
+            return response.choices[0].message.content
         
         except Exception as e:
             self.logger.error(f"Error in _request_translation: {str(e)}")

From 7aeffe25af49c6c29acfe4d8569e63ae75b951c4 Mon Sep 17 00:00:00 2001
From: popcion <xd@lolisenpai.onmicrosoft.com>
Date: Mon, 16 Dec 2024 22:32:36 +0800
Subject: [PATCH 9/9] Update deepseek.py

---
 manga_translator/translators/deepseek.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/manga_translator/translators/deepseek.py b/manga_translator/translators/deepseek.py
index 042f7ca81..6ded77efd 100644
--- a/manga_translator/translators/deepseek.py
+++ b/manga_translator/translators/deepseek.py
@@ -285,10 +285,12 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:
                 self.token_count_last = response.usage.total_tokens
             
             # 获取响应文本
-            if len(response.choices) > 0:
-                return response.choices[0].message.content
-            else:
-                raise Exception("No response content received")
+            for choice in response.choices:
+                if 'text' in choice:
+                    return choice.text
+
+            # If no response with text is found, return the first response's content (which may be empty)
+            return response.choices[0].message.content
         
         except Exception as e:
             self.logger.error(f"Error in _request_translation: {str(e)}")