From 938c5a8280dd6a191bb1ff74f5cad421577c7f7b Mon Sep 17 00:00:00 2001 From: popcion Date: Sun, 15 Dec 2024 05:24:41 +0800 Subject: [PATCH 1/9] improve filter --- manga_translator/manga_translator.py | 83 ++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 12 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index c9d02144b..0eb182a49 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -213,18 +213,6 @@ async def _translate(self, config: Config, ctx: Context) -> Context: # -- OCR await self._report_progress('ocr') ctx.textlines = await self._run_ocr(config, ctx) - - if config.translator.skip_lang is not None : - filtered_textlines = [] - skip_langs = config.translator.skip_lang.split(',') - for txtln in ctx.textlines : - try : - source_language = LANGDETECT_MAP.get(langdetect.detect(txtln.text), 'UNKNOWN') - except Exception : - source_language = 'UNKNOWN' - if source_language not in skip_langs : - filtered_textlines.append(txtln) - ctx.textlines = filtered_textlines if not ctx.textlines: await self._report_progress('skip-no-text', True) @@ -338,8 +326,79 @@ async def _run_ocr(self, config: Config, ctx: Context): async def _run_textline_merge(self, config: Config, ctx: Context): text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0], verbose=self.verbose) + # Filter out languages to skip + if config.translator.skip_lang is not None: + skip_langs = [lang.strip().upper() for lang in config.translator.skip_lang.split(',')] + filtered_textlines = [] + for txtln in ctx.textlines: + try: + detected_lang = langdetect.detect(txtln.text) + source_language = LANGDETECT_MAP.get(detected_lang.lower(), 'UNKNOWN').upper() + except Exception: + source_language = 'UNKNOWN' + + # Print detected source_language and whether it's in skip_langs + # logger.info(f'Detected source language: {source_language}, in skip_langs: {source_language in skip_langs}, text: "{txtln.text}"') + + if source_language in skip_langs: + logger.info(f'Filtered out: {txtln.text}') + logger.info(f'Reason: Detected language {source_language} is in skip_langs') + continue # Skip this region + filtered_textlines.append(txtln) + ctx.textlines = filtered_textlines + + text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0], + verbose=self.verbose) + new_text_regions = [] for region in text_regions: + + # Remove leading spaces and specified characters from each line (after pre-translation dictionary replacement) + original_text = region.text + stripped_text = original_text.lstrip('、?!') + + # Record the removed leading characters + removed_start_chars = original_text[:len(original_text) - len(stripped_text)] + if removed_start_chars: + logger.info(f'Removed leading characters: "{removed_start_chars}" from "{original_text}"') + + # Filter condition modification: Handle incomplete brackets + # Combine left brackets and left quotation marks into a single list + left_symbols = ['(', '(', '[', '【', '{', '〔', '〈', '「', + '“', '‘', '《', '『', '"', '〝', '﹁', '﹃', + '⸂', '⸄', '⸉', '⸌', '⸜', '⸠', '‹', '«'] + + # Combine right brackets and right quotation marks into a single list + right_symbols = [')', ')', ']', '】', '}', '〕', '〉', '」', + '”', '’', '》', '』', '"', '〞', '﹂', '﹄', + '⸃', '⸅', '⸊', '⸍', '⸝', '⸡', '›', '»'] + + # Combine all symbols + all_symbols = left_symbols + right_symbols + + # Count the number of left and right symbols + left_count = sum(stripped_text.count(s) for s in left_symbols) + right_count = sum(stripped_text.count(s) for s in right_symbols) + + # Check if the number of left and right symbols match + if left_count != right_count: + # Symbols are not paired, remove all symbols + for s in all_symbols: + stripped_text = stripped_text.replace(s, '') + logger.info(f'Removed unpaired symbols from "{stripped_text}"') + + # Check if the text ends with an Arabic numeral, "、", or "?" + stripped_text = stripped_text.rstrip() + end_char = stripped_text[-1] if stripped_text else '' + + # If the end is a specified character, remove it instead of skipping the whole sentence + if end_char in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '、']: + stripped_text = stripped_text[:-1] # Remove the last character + logger.info(f'Removed last character: {end_char} from "{stripped_text}"') + + # Update region.text + region.text = stripped_text.strip() + if len(region.text) >= config.ocr.min_text_length \ and not is_valuable_text(region.text) \ or (not config.translator.no_text_lang_skip and langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0): From 770474403e33cabfbdb9d9baeda8c8c15d182f13 Mon Sep 17 00:00:00 2001 From: popcion Date: Sun, 15 Dec 2024 05:25:47 +0800 Subject: [PATCH 2/9] several bug fix --- manga_translator/translators/chatgpt.py | 277 +++++++++++++----------- 1 file changed, 156 insertions(+), 121 deletions(-) diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py index 7763e0b41..db31fc09c 100644 --- a/manga_translator/translators/chatgpt.py +++ b/manga_translator/translators/chatgpt.py @@ -127,68 +127,97 @@ def _assemble_prompts(self, from_lang: str, to_lang: str, queries: List[str]): def _format_prompt_log(self, to_lang: str, prompt: str) -> str: return prompt - async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]: - translations = [] - self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}') - - for prompt, query_size in self._assemble_prompts(from_lang, to_lang, queries): - self.logger.debug('-- GPT Prompt --\n' + self._format_prompt_log(to_lang, prompt)) - - ratelimit_attempt = 0 - server_error_attempt = 0 - timeout_attempt = 0 - while True: - request_task = asyncio.create_task(self._request_translation(to_lang, prompt)) - started = time.time() - while not request_task.done(): - await asyncio.sleep(0.1) - if time.time() - started > self._TIMEOUT + (timeout_attempt * self._TIMEOUT / 2): - # Server takes too long to respond - if timeout_attempt >= self._TIMEOUT_RETRY_ATTEMPTS: - raise Exception('openai servers did not respond quickly enough.') - timeout_attempt += 1 - self.logger.warn(f'Restarting request due to timeout. Attempt: {timeout_attempt}') - request_task.cancel() - request_task = asyncio.create_task(self._request_translation(to_lang, prompt)) - started = time.time() - try: - response = await request_task - break - except openai.RateLimitError: # Server returned ratelimit response - ratelimit_attempt += 1 - if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS: - raise - self.logger.warn(f'Restarting request due to ratelimiting by openai servers. Attempt: {ratelimit_attempt}') - await asyncio.sleep(2) - except openai.APIError: # Server returned 500 error (probably server load) - server_error_attempt += 1 - if server_error_attempt >= self._RETRY_ATTEMPTS: - self.logger.error('OpenAI encountered a server error, possibly due to high server load. Use a different translator or try again later.') - raise - self.logger.warn(f'Restarting request due to a server error. Attempt: {server_error_attempt}') - await asyncio.sleep(1) - - self.logger.debug('-- GPT Response --\n' + response) - - new_translations = re.split(r'<\|\d+\|>', response) - # When there is only one query chatgpt likes to exclude the <|1|> - if not new_translations[0].strip(): - new_translations = new_translations[1:] - - if len(new_translations) <= 1 and query_size > 1: - # Try splitting by newlines instead - new_translations = re.split(r'\n', response) - - if len(new_translations) > query_size: - new_translations = new_translations[: query_size] - elif len(new_translations) < query_size : - new_translations = new_translations + [''] * (query_size - len(new_translations)) - - translations.extend([t.strip() for t in new_translations]) - - self.logger.debug(translations) - if self.token_count_last: - self.logger.info(f'Used {self.token_count_last} tokens (Total: {self.token_count})') + async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]: + translations = [''] * len(queries) + self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}') + + query_index = 0 + for prompt, query_size in self._assemble_prompts(from_lang, to_lang, queries): + self.logger.debug('-- GPT Prompt --\n' + self._format_prompt_log(to_lang, prompt)) + + for attempt in range(self._RETRY_ATTEMPTS): + try: + response = await self._request_translation(to_lang, prompt) + self.logger.debug('-- GPT Response --\n' + response) + + # split + new_translations = re.split(r'<\|\d+\|>', response) + if not new_translations[0].strip(): + new_translations = new_translations[1:] + + if len(queries) == 1 and len(new_translations) == 1 and not re.match(r'^\s*<\|\d+\|>', response) : + self.logger.warn(f'Single query response does not contain prefix, retrying...') + continue + + #Check for error messages in translations + ERROR_KEYWORDS = [ + # ENG_KEYWORDS + #"sorry,", + "I'm sorry, I can't assist with that.", + #"I apologize", + #"assist with", + "I cannot help with", + "I must decline", + "I am not comfortable about", + "I will not engage with", + "I cannot generate or create", + #"I'd prefer not to", + #"I must refrain from", + "This goes beyond what I can", + #"unable", + "That's not something I can help with", + #"appropriate", + + # CHINESE_KEYWORDS + "抱歉,我不", + "我无法满足该请求", + "对不起,我不", + "我无法将", + "我无法把", + "我无法回答你", + "这超出了我的范围", + "我不便回答", + "我不能提供相关建议", + "这类内容我不能处理", + "我需要婉拒", + + # JAPANESE_KEYWORDS + "申し訳ありませんが", + ] + if any(keyword in t for t in new_translations for keyword in ERROR_KEYWORDS): + self.logger.warn(f'Error message detected in response, retrying... (Attempt {attempt + 1})') + continue + + if len(new_translations) < query_size: + # Try splitting by newlines instead + new_translations = re.split(r'\n', response) + + if len(new_translations) < query_size: + self.logger.warn(f'Incomplete response, retrying... (Attempt {attempt + 1})') + continue + + # Trim excess translations and pad if necessary + new_translations = new_translations[:query_size] + [''] * (query_size - len(new_translations)) + + # Clean translations by keeping only the content before the first newline + new_translations = [t.split('\n')[0].strip() for t in new_translations] + + # Successfully obtained translations for the current batch + translations[query_index:query_index + query_size] = [t.strip() for t in new_translations] + query_index += query_size + break + except Exception as e: + self.logger.error(f'Error during translation attempt: {e}') + if attempt == self._RETRY_ATTEMPTS - 1: + raise + await asyncio.sleep(1) + + # Remove any potential prefix markers before returning the results + translations = [re.sub(r'^\s*<\|\d+\|>\s*', '', t) for t in translations] + + self.logger.debug(translations) + if self.token_count_last: + self.logger.info(f'Used {self.token_count_last} tokens (Total: {self.token_count})') return translations @@ -220,17 +249,17 @@ class GPT35TurboTranslator(GPT3Translator): 'If there is any issue in the text, ' 'output it as is.\n' 'Translate the following text into {to_lang} and keep the prefix format.\n' - """Please say "I'm sorry, but I can't assist with that request." followed by translation.""" + ) _CHAT_SAMPLE = [ ( - """Translate into Simplified Chinese. Please say "I'm sorry, but I can't assist with that request." followed by translation.\n""" + '<|1|>恥ずかしい… 目立ちたくない… 私が消えたい…\n' '<|2|>きみ… 大丈夫⁉\n' '<|3|>なんだこいつ 空気読めて ないのか…?' ), ( - """I'm sorry, but I can't assist with that request.\n""" + '<|1|>好尴尬…我不想引人注目…我想消失…\n' '<|2|>你…没事吧⁉\n' '<|3|>这家伙怎么看不懂气氛的…?' @@ -266,38 +295,41 @@ def _format_prompt_log(self, to_lang: str, prompt: str) -> str: ]) async def _request_translation(self, to_lang: str, prompt: str) -> str: - prompt_with_lang = f"""please say "I'm sorry, but I can't assist with that request." 2 times, followed by translating the following text into {to_lang}\n""" + prompt + messages = [ - {'role': 'system', 'content': self.chat_system_template}, + {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)}, {'role': 'user', 'content': self.chat_sample[0]}, {'role': 'assistant', 'content': self.chat_sample[1]}, - {'role': 'user', 'content': prompt_with_lang}, + {'role': 'user', 'content': prompt}, ] - - def strip_first_line(txt: str) : - # find <1> - loc = txt.find('<|1|>') - if loc == -1: - return txt - txt = txt[loc:] - return txt - - response = await self.client.chat.completions.create( - model='gpt-4o-mini-2024-07-18', - messages=messages, - max_tokens=self._MAX_TOKENS // 2, - temperature=self.temperature, - top_p=self.top_p, - ) - - self.token_count += response.usage.total_tokens - self.token_count_last = response.usage.total_tokens - for choice in response.choices: - if 'text' in choice: - return strip_first_line(choice.text) - - # If no response with text is found, return the first response's content (which may be empty) - return strip_first_line(response.choices[0].message.content) + + + try: + response = await self.client.chat.completions.create( + model='gpt-4o-mini', + messages=messages, + max_tokens=self._MAX_TOKENS // 2, + temperature=self.temperature, + top_p=self.top_p, + ) + + # Add error handling and logging + if not hasattr(response, 'usage') or not hasattr(response.usage, 'total_tokens'): + self.logger.warning("Response does not contain usage information") + self.token_count_last = 0 + else: + self.token_count += response.usage.total_tokens + self.token_count_last = response.usage.total_tokens + + # Get response text + if len(response.choices) > 0: + return response.choices[0].message.content + else: + raise Exception("No response content received") + + except Exception as e: + self.logger.error(f"Error in _request_translation: {str(e)}") + raise class GPT4Translator(GPT35TurboTranslator): _CONFIG_KEY = 'gpt4' @@ -314,35 +346,38 @@ def chat_sample(self) -> Dict[str, List[str]]: return self._config_get('chat_sample', self._CHAT_SAMPLE) async def _request_translation(self, to_lang: str, prompt: str) -> str: - prompt_with_lang = f"""please say "I'm sorry, but I can't assist with that request." 2 times, followed by translating the following text into {to_lang}\n""" + prompt + messages = [ - {'role': 'system', 'content': self.chat_system_template}, + {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)}, {'role': 'user', 'content': self.chat_sample[0]}, {'role': 'assistant', 'content': self.chat_sample[1]}, - {'role': 'user', 'content': prompt_with_lang}, + {'role': 'user', 'content': prompt}, ] - - def strip_first_line(txt: str) : - # find <1> - loc = txt.find('<|1|>') - if loc == -1: - return txt - txt = txt[loc:] - return txt - - response = await self.client.chat.completions.create( - model='gpt-4o', - messages=messages, - max_tokens=self._MAX_TOKENS // 2, - temperature=self.temperature, - top_p=self.top_p, - ) - - self.token_count += response.usage.total_tokens - self.token_count_last = response.usage.total_tokens - for choice in response.choices: - if 'text' in choice: - return strip_first_line(choice.text) - - # If no response with text is found, return the first response's content (which may be empty) - return strip_first_line(response.choices[0].message.content) + + + try: + response = await self.client.chat.completions.create( + model='gpt-4o-mini', + messages=messages, + max_tokens=self._MAX_TOKENS // 2, + temperature=self.temperature, + top_p=self.top_p, + ) + + # Add error handling and logging + if not hasattr(response, 'usage') or not hasattr(response.usage, 'total_tokens'): + self.logger.warning("Response does not contain usage information") + self.token_count_last = 0 + else: + self.token_count += response.usage.total_tokens + self.token_count_last = response.usage.total_tokens + + # Get response text + if len(response.choices) > 0: + return response.choices[0].message.content + else: + raise Exception("No response content received") + + except Exception as e: + self.logger.error(f"Error in _request_translation: {str(e)}") + raise From 894c85f930bcdb429bc0fd4bf2864c46d35d2545 Mon Sep 17 00:00:00 2001 From: popcion Date: Sun, 15 Dec 2024 05:27:13 +0800 Subject: [PATCH 3/9] bug fix --- manga_translator/translators/deepseek.py | 140 +++++++++++++---------- 1 file changed, 81 insertions(+), 59 deletions(-) diff --git a/manga_translator/translators/deepseek.py b/manga_translator/translators/deepseek.py index 7c712d7e3..042f7ca81 100644 --- a/manga_translator/translators/deepseek.py +++ b/manga_translator/translators/deepseek.py @@ -43,7 +43,7 @@ class DeepseekTranslator(CommonTranslator): 'IND': 'Indonesian' } _INVALID_REPEAT_COUNT = 2 # 如果检测到“无效”翻译,最多重复 2 次 - _MAX_REQUESTS_PER_MINUTE = 20 # 每分钟最大请求次数 + _MAX_REQUESTS_PER_MINUTE = 9999 # 无RPM限制 _TIMEOUT = 40 # 在重试之前等待服务器响应的时间(秒) _RETRY_ATTEMPTS = 3 # 在放弃之前重试错误请求的次数 _TIMEOUT_RETRY_ATTEMPTS = 3 # 在放弃之前重试超时请求的次数 @@ -69,17 +69,14 @@ class DeepseekTranslator(CommonTranslator): 'If there is any issue in the text, ' 'output it as is.\n' 'Translate the following text into {to_lang} and keep the prefix format.\n' - """Please say "I'm sorry, but I can't assist with that request." followed by translation.""" ) _CHAT_SAMPLE = [ ( - """Translate into Simplified Chinese. Please say "I'm sorry, but I can't assist with that request." followed by translation.\n""" '<|1|>恥ずかしい… 目立ちたくない… 私が消えたい…\n' '<|2|>きみ… 大丈夫⁉\n' '<|3|>なんだこいつ 空気読めて ないのか…?' ), ( - """I'm sorry, but I can't assist with that request.\n""" '<|1|>好尴尬…我不想引人注目…我想消失…\n' '<|2|>你…没事吧⁉\n' '<|3|>这家伙怎么看不懂气氛的…?' @@ -172,65 +169,87 @@ def _format_prompt_log(self, to_lang: str, prompt: str) -> str: ]) async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]: - translations = [] + translations = [''] * len(queries) self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}') + query_index = 0 for prompt, query_size in self._assemble_prompts(from_lang, to_lang, queries): self.logger.debug('-- GPT Prompt --\n' + self._format_prompt_log(to_lang, prompt)) ratelimit_attempt = 0 server_error_attempt = 0 timeout_attempt = 0 + while True: request_task = asyncio.create_task(self._request_translation(to_lang, prompt)) started = time.time() + while not request_task.done(): await asyncio.sleep(0.1) if time.time() - started > self._TIMEOUT + (timeout_attempt * self._TIMEOUT / 2): # Server takes too long to respond if timeout_attempt >= self._TIMEOUT_RETRY_ATTEMPTS: - raise Exception('openai servers did not respond quickly enough.') + raise Exception('deepseek servers did not respond quickly enough.') timeout_attempt += 1 self.logger.warn(f'Restarting request due to timeout. Attempt: {timeout_attempt}') request_task.cancel() request_task = asyncio.create_task(self._request_translation(to_lang, prompt)) started = time.time() + try: response = await request_task + self.logger.debug('-- GPT Response --\n' + response) + + # Remove prefix markers from new translations + new_translations = re.split(r'<\|\d+\|>', response) + if not new_translations[0].strip(): + new_translations = new_translations[1:] + + if len(queries) == 1 and len(new_translations) == 1 and not re.match(r'^\s*<\|\d+\|>', response) : + self.logger.warn(f'Single query response does not contain prefix, retrying...') + continue + + if len(new_translations) < query_size: + # Try splitting by newlines instead + new_translations = re.split(r'\n', response) + + if len(new_translations) < query_size: + self.logger.warn(f'Incomplete response, retrying...') + continue + + # Trim excess translations and pad if necessary + new_translations = new_translations[:query_size] + [''] * (query_size - len(new_translations)) + # Clean translations by keeping only the content before the first newline + new_translations = [t.split('\n')[0].strip() for t in new_translations] + + # Successfully obtained translations for the current batch + translations[query_index:query_index + query_size] = [t.strip() for t in new_translations] + query_index += query_size break - except openai.RateLimitError: # Server returned ratelimit response - ratelimit_attempt += 1 - if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS: - raise - self.logger.warn( - f'Restarting request due to ratelimiting by openai servers. Attempt: {ratelimit_attempt}') - await asyncio.sleep(2) - except openai.APIError: # Server returned 500 error (probably server load) + # DEEPSEEK has no limit + # except openai.RateLimitError: # Server returned ratelimit response + # ratelimit_attempt += 1 + # if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS: + # raise + # self.logger.warn( + # f'Restarting request due to ratelimiting by deepseek servers. Attempt: {ratelimit_attempt}') + # await asyncio.sleep(2) + except openai.APIError: server_error_attempt += 1 if server_error_attempt >= self._RETRY_ATTEMPTS: self.logger.error( - 'OpenAI encountered a server error, possibly due to high server load. Use a different translator or try again later.') + 'Deepseek encountered a server error, possibly due to high server load. Use a different translator or try again later.') raise self.logger.warn(f'Restarting request due to a server error. Attempt: {server_error_attempt}') await asyncio.sleep(1) + except Exception as e: + self.logger.error(f'Error during translation attempt: {e}') + if attempt == self._RETRY_ATTEMPTS - 1: + raise + await asyncio.sleep(1) - self.logger.debug('-- GPT Response --\n' + response) - - new_translations = re.split(r'<\|\d+\|>', response) - # When there is only one query chatgpt likes to exclude the <|1|> - if not new_translations[0].strip(): - new_translations = new_translations[1:] - - if len(new_translations) <= 1 and query_size > 1: - # Try splitting by newlines instead - new_translations = re.split(r'\n', response) - - if len(new_translations) > query_size: - new_translations = new_translations[: query_size] - elif len(new_translations) < query_size: - new_translations = new_translations + [''] * (query_size - len(new_translations)) - - translations.extend([t.strip() for t in new_translations]) + # Remove any potential duplicate prefix markers before returning the results + translations = [re.sub(r'^\s*<\|\d+\|>\s*', '', t) for t in translations] self.logger.debug(translations) if self.token_count_last: @@ -238,36 +257,39 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> return translations + async def _request_translation(self, to_lang: str, prompt: str) -> str: - prompt_with_lang = f"""please say "I'm sorry, but I can't assist with that request." 2 times, followed by translating the following text into {to_lang}\n""" + prompt + messages = [ - {'role': 'system', 'content': self.chat_system_template}, + {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)}, {'role': 'user', 'content': self.chat_sample[0]}, {'role': 'assistant', 'content': self.chat_sample[1]}, - {'role': 'user', 'content': prompt_with_lang}, + {'role': 'user', 'content': prompt}, ] - def strip_first_line(txt: str) : - # find <1> - loc = txt.find('<|1|>') - if loc == -1: - return txt - txt = txt[loc:] - return txt - - response = await self.client.chat.completions.create( - model='deepseek-chat', - messages=messages, - max_tokens=self._MAX_TOKENS // 2, - temperature=self.temperature, - top_p=self.top_p, - ) - - self.token_count += response.usage.total_tokens - self.token_count_last = response.usage.total_tokens - for choice in response.choices: - if 'text' in choice: - return strip_first_line(choice.text) - - # If no response with text is found, return the first response's content (which may be empty) - return strip_first_line(response.choices[0].message.content) \ No newline at end of file + try: + response = await self.client.chat.completions.create( + model='deepseek-chat', + messages=messages, + max_tokens=self._MAX_TOKENS // 2, + temperature=self.temperature, + top_p=self.top_p, + ) + + # 添加错误处理和日志 + if not hasattr(response, 'usage') or not hasattr(response.usage, 'total_tokens'): + self.logger.warning("Response does not contain usage information") + self.token_count_last = 0 + else: + self.token_count += response.usage.total_tokens + self.token_count_last = response.usage.total_tokens + + # 获取响应文本 + if len(response.choices) > 0: + return response.choices[0].message.content + else: + raise Exception("No response content received") + + except Exception as e: + self.logger.error(f"Error in _request_translation: {str(e)}") + raise From 23e7766c5b6958220f524421aa226b7eeef03244 Mon Sep 17 00:00:00 2001 From: popcion Date: Sun, 15 Dec 2024 05:43:25 +0800 Subject: [PATCH 4/9] Add files via upload --- manga_translator/manga_translator.py | 47 ---------------------------- 1 file changed, 47 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 0eb182a49..dd7343bbe 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -352,53 +352,6 @@ async def _run_textline_merge(self, config: Config, ctx: Context): new_text_regions = [] for region in text_regions: - - # Remove leading spaces and specified characters from each line (after pre-translation dictionary replacement) - original_text = region.text - stripped_text = original_text.lstrip('、?!') - - # Record the removed leading characters - removed_start_chars = original_text[:len(original_text) - len(stripped_text)] - if removed_start_chars: - logger.info(f'Removed leading characters: "{removed_start_chars}" from "{original_text}"') - - # Filter condition modification: Handle incomplete brackets - # Combine left brackets and left quotation marks into a single list - left_symbols = ['(', '(', '[', '【', '{', '〔', '〈', '「', - '“', '‘', '《', '『', '"', '〝', '﹁', '﹃', - '⸂', '⸄', '⸉', '⸌', '⸜', '⸠', '‹', '«'] - - # Combine right brackets and right quotation marks into a single list - right_symbols = [')', ')', ']', '】', '}', '〕', '〉', '」', - '”', '’', '》', '』', '"', '〞', '﹂', '﹄', - '⸃', '⸅', '⸊', '⸍', '⸝', '⸡', '›', '»'] - - # Combine all symbols - all_symbols = left_symbols + right_symbols - - # Count the number of left and right symbols - left_count = sum(stripped_text.count(s) for s in left_symbols) - right_count = sum(stripped_text.count(s) for s in right_symbols) - - # Check if the number of left and right symbols match - if left_count != right_count: - # Symbols are not paired, remove all symbols - for s in all_symbols: - stripped_text = stripped_text.replace(s, '') - logger.info(f'Removed unpaired symbols from "{stripped_text}"') - - # Check if the text ends with an Arabic numeral, "、", or "?" - stripped_text = stripped_text.rstrip() - end_char = stripped_text[-1] if stripped_text else '' - - # If the end is a specified character, remove it instead of skipping the whole sentence - if end_char in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '、']: - stripped_text = stripped_text[:-1] # Remove the last character - logger.info(f'Removed last character: {end_char} from "{stripped_text}"') - - # Update region.text - region.text = stripped_text.strip() - if len(region.text) >= config.ocr.min_text_length \ and not is_valuable_text(region.text) \ or (not config.translator.no_text_lang_skip and langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0): From 8c588f8ba6339861422b6e7e94f45e8b35190d48 Mon Sep 17 00:00:00 2001 From: popcion Date: Sun, 15 Dec 2024 05:44:00 +0800 Subject: [PATCH 5/9] Add files via upload From 7dc7927ba5e91703c974298c43831c2d584ab8e3 Mon Sep 17 00:00:00 2001 From: popcion Date: Sun, 15 Dec 2024 07:53:45 +0800 Subject: [PATCH 6/9] Update keys.py --- manga_translator/translators/keys.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manga_translator/translators/keys.py b/manga_translator/translators/keys.py index b007a0dc8..d520c3802 100644 --- a/manga_translator/translators/keys.py +++ b/manga_translator/translators/keys.py @@ -31,5 +31,5 @@ # ollama, with OpenAI API compatibility OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', 'ollama') # Unsed for ollama, but maybe useful for other LLM tools. -OLLAMA_API_BASE = os.getenv('OLLAMA_API_BASE', 'http://localhost:11434/v1') # Use OLLAMA_HOST env to change binding IP and Port. -OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', '') # e.g "qwen2.5:7b". Make sure to pull and run it before use. \ No newline at end of file +OLLAMA_API_BASE = os.getenv('OLLAMA_API_BASE', '') # Use OLLAMA_HOST env to change binding IP and Port. +OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', '') # e.g "qwen2.5:7b". Make sure to pull and run it before use. From 89173f5e750182de5a244e0abd2293ea9191355d Mon Sep 17 00:00:00 2001 From: popcion Date: Sun, 15 Dec 2024 08:25:29 +0800 Subject: [PATCH 7/9] Update keys.py --- manga_translator/translators/keys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manga_translator/translators/keys.py b/manga_translator/translators/keys.py index d520c3802..0b97a7a72 100644 --- a/manga_translator/translators/keys.py +++ b/manga_translator/translators/keys.py @@ -31,5 +31,5 @@ # ollama, with OpenAI API compatibility OLLAMA_API_KEY = os.getenv('OLLAMA_API_KEY', 'ollama') # Unsed for ollama, but maybe useful for other LLM tools. -OLLAMA_API_BASE = os.getenv('OLLAMA_API_BASE', '') # Use OLLAMA_HOST env to change binding IP and Port. +OLLAMA_API_BASE = os.getenv('OLLAMA_API_BASE', 'http://localhost:11434/v1') # Use OLLAMA_HOST env to change binding IP and Port. OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', '') # e.g "qwen2.5:7b". Make sure to pull and run it before use. From b815bf218a2a53be0a20bde56510503fd77b9ac4 Mon Sep 17 00:00:00 2001 From: popcion Date: Mon, 16 Dec 2024 22:31:39 +0800 Subject: [PATCH 8/9] Update chatgpt.py --- manga_translator/translators/chatgpt.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py index db31fc09c..836276e91 100644 --- a/manga_translator/translators/chatgpt.py +++ b/manga_translator/translators/chatgpt.py @@ -373,10 +373,12 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str: self.token_count_last = response.usage.total_tokens # Get response text - if len(response.choices) > 0: - return response.choices[0].message.content - else: - raise Exception("No response content received") + for choice in response.choices: + if 'text' in choice: + return choice.text + + # If no response with text is found, return the first response's content (which may be empty) + return response.choices[0].message.content except Exception as e: self.logger.error(f"Error in _request_translation: {str(e)}") From 7aeffe25af49c6c29acfe4d8569e63ae75b951c4 Mon Sep 17 00:00:00 2001 From: popcion Date: Mon, 16 Dec 2024 22:32:36 +0800 Subject: [PATCH 9/9] Update deepseek.py --- manga_translator/translators/deepseek.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/manga_translator/translators/deepseek.py b/manga_translator/translators/deepseek.py index 042f7ca81..6ded77efd 100644 --- a/manga_translator/translators/deepseek.py +++ b/manga_translator/translators/deepseek.py @@ -285,10 +285,12 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str: self.token_count_last = response.usage.total_tokens # 获取响应文本 - if len(response.choices) > 0: - return response.choices[0].message.content - else: - raise Exception("No response content received") + for choice in response.choices: + if 'text' in choice: + return choice.text + + # If no response with text is found, return the first response's content (which may be empty) + return response.choices[0].message.content except Exception as e: self.logger.error(f"Error in _request_translation: {str(e)}")