zou-group · happen2me · Aug 1, 2024 · Aug 1, 2024 · Aug 1, 2024 · Aug 1, 2024
diff --git a/requirements.txt b/requirements.txt
@@ -1,11 +1,5 @@
-openai>=1.23.6
-tenacity>=8.2.3
-python-dotenv>=1.0.0
-pandas>=1.5.3
-platformdirs>=3.11.0
 datasets>=2.14.6
 diskcache>=5.6.3
-graphviz>=0.20.3
 gdown>=5.2.0
 litellm>=1.49.5
 pillow

diff --git a/setup.py b/setup.py
@@ -1,11 +1,40 @@
-from setuptools import setup, find_packages
+import os
+import subprocess
 
-with open("requirements.txt") as f:
+from setuptools import find_packages, setup
+from setuptools.command.install import install
+
+
+with open("requirements.txt", encoding="utf-8") as f:
     requirements = f.read().splitlines()
 
-with open("README.md") as readme_file:
+with open("README.md", encoding="utf-8") as readme_file:
     readme = readme_file.read()
 
+
+class CustomInstallCommand(install):
+    """Customized setuptools install command - compiles .po files to .mo."""
+
+    def run(self):
+        # Compile .po files to .mo files before proceeding with installation
+        self.compile_translations()
+        # Run the standard install process
+        install.run(self)
+
+    def compile_translations(self):
+        locales_dir = os.path.join('textgrad', 'locales')
+        for lang in os.listdir(locales_dir):
+            po_file = os.path.join(locales_dir, lang, 'LC_MESSAGES', 'textgrad.po')
+            mo_file = os.path.join(locales_dir, lang, 'LC_MESSAGES', 'textgrad.mo')
+            if os.path.exists(po_file):
+                print(f'Compiling {po_file} to {mo_file}')
+                try:
+                    subprocess.run(['msgfmt', po_file, '-o', mo_file], check=True)
+                except subprocess.CalledProcessError as e:
+                    print(f"Warning: Failed to compile {po_file}. Installation will continue.\nError: {e}")
+            else:
+                print(f'No .po file found for language {lang}')
+
 setup(
     name="textgrad",
     version="0.1.6",
@@ -25,11 +54,14 @@
     author="Zou Group",
     author_email="[email protected]",
     packages=find_packages(include=["textgrad", "textgrad.*"]),
+    cmdclass={
+        'install': CustomInstallCommand,
+    },
     include_package_data=True,
+    package_data={"textgrad": ["locales/*/LC_MESSAGES/*.mo"]},
     install_requires=requirements,
     extras_require={
         "vllm": ["vllm"],
     },
     zip_safe=False,
 )
-
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,9 @@
+import builtins
+
+
+def return_as_is(text):
+    """This function is to avoid the undefined _ from ruining tests"""
+    return text
+
+
+builtins.__dict__['_'] = return_as_is
diff --git a/textgrad/__init__.py b/textgrad/__init__.py
@@ -2,6 +2,15 @@
 import logging
 import json
 from datetime import datetime
+
+from .i18n import _set_language
+
+# Set the language of strings fed to LLMs according to the following environment variables:
+# LANGUAGE, LC_ALL, LC_MESSAGES, LANG.
+# If none of them is found, it fallbacks to the original string literals as is.
+_set_language()
+
+
 class CustomJsonFormatter(logging.Formatter):
     def format(self, record: logging.LogRecord) -> str:
         super(CustomJsonFormatter, self).format(record)
@@ -25,4 +34,4 @@ def format(self, record: logging.LogRecord) -> str:
 from .config import set_backward_engine, SingletonBackwardEngine
 from .autograd import sum, aggregate
 
-singleton_backward_engine = SingletonBackwardEngine()
+singleton_backward_engine = SingletonBackwardEngine()
diff --git a/textgrad/autograd/algebra.py b/textgrad/autograd/algebra.py
@@ -1,10 +1,12 @@
 ## Operations over variables.
 from typing import List, Set
+
 from textgrad import logger
-from textgrad.variable import Variable
 from textgrad.engine import EngineLM
-from .reduce_prompts import construct_reduce_prompt, REDUCE_MEAN_SYSTEM_PROMPT
-from .function import Function, BackwardContext
+from textgrad.variable import Variable
+
+from .function import BackwardContext, Function
+from .reduce_prompts import REDUCE_MEAN_SYSTEM_PROMPT, construct_reduce_prompt
 
 
 def _reduce_gradients_mean(gradients: Set[Variable], backward_engine: EngineLM) -> Variable:
@@ -84,21 +86,23 @@ def backward(self, summation: Variable, backward_engine: EngineLM):
             if summation_gradients == "":
                 variable_gradient_value = ""
             else:
-                variable_gradient_value = f"Here is the combined feedback we got for this specific {variable.get_role_description()} and other variables: {summation_gradients}."
+                var_grad_template = _("Here is the combined feedback we got for this specific {variable_desc} and other variables: {feedback}")
+                variable_gradient_value = var_grad_template.format(variable_desc=variable.get_role_description(), feedback=summation_gradients)
 
-            logger.info(f"Idempotent backward", extra={"v_gradient_value": variable_gradient_value, 
+            logger.info("Idempotent backward", extra={"v_gradient_value": variable_gradient_value,
                                                     "summation_role": summation.get_role_description()})
 
+            feedback_role_template = _("feedback to {variable_desc}")
             var_gradients = Variable(value=variable_gradient_value, 
-                                    role_description=f"feedback to {variable.get_role_description()}")
+                                    role_description=feedback_role_template.format(variable_desc=variable.get_role_description()))
             variable.gradients.add(var_gradients)
 
             if summation._reduce_meta != []:
                 var_gradients._reduce_meta.extend(summation._reduce_meta)
                 variable._reduce_meta.extend(summation._reduce_meta)
 
             variable.gradients.add(Variable(value=variable_gradient_value, 
-                                            role_description=f"feedback to {variable.get_role_description()}"))
+                                            role_description=feedback_role_template.format(variable_desc=variable.get_role_description())))
 
 
 class Aggregate(Function):
@@ -124,8 +128,9 @@ def forward(self, variables: List[Variable]) -> Variable:
         # We also need to communicate to the variables that they are part of a mean operation.
         reduce_meta = {"op": _reduce_gradients_mean, "id": id(variables)}
 
+        agg_role_template = _("a combination of the following variables: {role_descriptions}.")
         aggregated_variable = Variable(value=concat_values, 
-                                    role_description=f"a combination of the following variables: {role_descriptions}.",
+                                    role_description=agg_role_template.format(role_descriptions=role_descriptions),
                                     predecessors=variables,
                                     requires_grad=any([v.requires_grad for v in variables]))
 
@@ -142,13 +147,15 @@ def backward(self, aggregated_variable: Variable, backward_engine: EngineLM):
         if aggregate_gradients == "":
             variable_gradient_value = ""
         else:
-            variable_gradient_value = f"Here is the combined feedback we got for this specific {variable.get_role_description()} and other variables: {aggregate_gradients}."
+            var_grad_template = _("Here is the combined feedback we got for this specific {role_description} and other variables: {aggregate_gradients}")
+            variable_gradient_value = var_grad_template.format(role_description=variable.get_role_description(), aggregate_gradients=aggregate_gradients)
 
         logger.info(f"aggregation backward", extra={"v_gradient_value": variable_gradient_value, 
                                                 "aggregation_role": aggregated_variable.get_role_description()})
 
+        feedback_role_template = _("feedback to {role_description}")
         var_gradients = Variable(value=variable_gradient_value, 
-                                role_description=f"feedback to {variable.get_role_description()}")
+                                role_description=feedback_role_template.format(role_description=variable.get_role_description()))
         variable.gradients.add(var_gradients)
 
         if aggregated_variable._reduce_meta != []:

diff --git a/textgrad/autograd/llm_backward_prompts.py b/textgrad/autograd/llm_backward_prompts.py
@@ -1,16 +1,16 @@
-GLOSSARY_TEXT_BACKWARD = """
+GLOSSARY_TEXT_BACKWARD = _("""
 ### Glossary of tags that will be sent to you:
 # - <LM_SYSTEM_PROMPT>: The system prompt for the language model.
 # - <LM_INPUT>: The input to the language model.
 # - <LM_OUTPUT>: The output of the language model.
 # - <OBJECTIVE_FUNCTION>: The objective of the optimization task.
 # - <VARIABLE>: Specifies the span of the variable.
-# - <ROLE>: The role description of the variable."""
+# - <ROLE>: The role description of the variable.""")
 
 ### Backward engine prompts
 
 # System prompt to the backward engine.
-BACKWARD_SYSTEM_PROMPT = (
+BACKWARD_SYSTEM_PROMPT = _(
     "You are part of an optimization system that improves a given text (i.e. the variable). You are the gradient (feedback) engine. "
     "Your only responsibility is to give intelligent and creative feedback and constructive criticism to variables, given an objective specified in <OBJECTIVE_FUNCTION> </OBJECTIVE_FUNCTION> tags. "
     "The variables may be solutions to problems, prompts to language models, code, or any other text-based variable. "
@@ -21,38 +21,38 @@
     f"{GLOSSARY_TEXT_BACKWARD}")
 
 # First part of the prompt for the llm backward function
-CONVERSATION_TEMPLATE = (
+CONVERSATION_TEMPLATE = _(
     "<LM_SYSTEM_PROMPT> {system_prompt} </LM_SYSTEM_PROMPT>\n\n"
     "<LM_INPUT> {prompt} </LM_INPUT>\n\n"
     "<LM_OUTPUT> {response_value} </LM_OUTPUT>\n\n"
 )
 
 # Has the gradient on the output.
-CONVERSATION_START_INSTRUCTION_CHAIN = (
+CONVERSATION_START_INSTRUCTION_CHAIN = _(
     "You will give feedback to a variable with the following role: <ROLE> {variable_desc} </ROLE>. "
     "Here is a conversation with a language model (LM):\n\n"
     "{conversation}"
 )
-OBJECTIVE_INSTRUCTION_CHAIN = (
+OBJECTIVE_INSTRUCTION_CHAIN = _(
     "This conversation is part of a larger system. The <LM_OUTPUT> was later used as {response_desc}.\n\n"
     "<OBJECTIVE_FUNCTION>Your goal is to give feedback to the variable to address the following feedback on the LM_OUTPUT: {response_gradient} </OBJECTIVE_FUNCTION>\n\n"
 )
 
 # Does not have gradient on the output
-CONVERSATION_START_INSTRUCTION_BASE = (
+CONVERSATION_START_INSTRUCTION_BASE = _(
     "You will give feedback to a variable with the following role: <ROLE> {variable_desc} </ROLE>. "
     "Here is an evaluation of the variable using a language model:\n\n"
     "{conversation}"
 )
 
-OBJECTIVE_INSTRUCTION_BASE = (
+OBJECTIVE_INSTRUCTION_BASE = _(
     "<OBJECTIVE_FUNCTION>Your goal is to give feedback and criticism to the variable given the above evaluation output. "
     "Our only goal is to improve the above metric, and nothing else. </OBJECTIVE_FUNCTION>\n\n"
 )
 
 # Third part of the prompt for the llm backward function.
 # Asks the user to evaluate a variable in the conversation.
-EVALUATE_VARIABLE_INSTRUCTION = (
+EVALUATE_VARIABLE_INSTRUCTION = _(
     "We are interested in giving feedback to the {variable_desc} "
     "for this conversation. Specifically, give feedback to the following span "
     "of text:\n\n<VARIABLE> "
@@ -61,19 +61,19 @@
     "could be improved to improve the <OBJECTIVE_FUNCTION>. Be very creative, critical, and intelligent.\n\n"
 )
 
-SEARCH_QUERY_BACKWARD_INSTRUCTION = (
+SEARCH_QUERY_BACKWARD_INSTRUCTION = _(
     "Here is a query and a response from searching with {engine_name}:\n"
     "<QUERY> {query} </QUERY>\n"
     "<RESULTS> {results} </RESULTS>\n\n"
 )
 
 
-GRADIENT_OF_RESULTS_INSTRUCTION = (
+GRADIENT_OF_RESULTS_INSTRUCTION = _(
     "For the search results from {engine_name} we got the following feedback:\n\n"
     "<FEEDBACK>{results_gradient}</FEEDBACK>\n\n"
 )
 
-IN_CONTEXT_EXAMPLE_PROMPT_ADDITION = (
+IN_CONTEXT_EXAMPLE_PROMPT_ADDITION = _(
     "You must base on the following examples when give feedback and criticism to the variable:\n\n"
     "<EXAMPLES>{in_context_examples}</EXAMPLES>\n\n"
 )
diff --git a/textgrad/autograd/multimodal_backward_prompts.py b/textgrad/autograd/multimodal_backward_prompts.py
@@ -1,5 +1,5 @@
 # First part of the prompt for the llm backward function
-MULTIMODAL_CONVERSATION_TEMPLATE = (
+MULTIMODAL_CONVERSATION_TEMPLATE = _(
     "\n Above messages are the <LM_INPUT>\n\n"
     "<LM_SYSTEM_PROMPT> {system_prompt} </LM_SYSTEM_PROMPT>\n\n"
     "<LM_OUTPUT> {response_value} </LM_OUTPUT>\n\n"

diff --git a/textgrad/autograd/reduce_prompts.py b/textgrad/autograd/reduce_prompts.py
@@ -1,4 +1,4 @@
-REDUCE_MEAN_SYSTEM_PROMPT = (
+REDUCE_MEAN_SYSTEM_PROMPT = _(
     "You are part of an optimization system that improves a given text (i.e. the variable). "
     "Your only responsibility is to critically aggregate and summarize the feedback from sources. "
     "The variables may be solutions to problems, prompts to language models, code, or any other text-based variable. "

diff --git a/textgrad/autograd/string_based_ops.py b/textgrad/autograd/string_based_ops.py
@@ -1,44 +1,45 @@
+from typing import Callable, Dict, List
+
 from textgrad import logger
-from textgrad.variable import Variable
 from textgrad.engine import EngineLM
-from .function import Function, BackwardContext
-from typing import Callable, Dict, List
+from textgrad.variable import Variable
+
+from .function import BackwardContext, Function
 
-CONVERSATION_TEMPLATE_STRING = (
+CONVERSATION_TEMPLATE_STRING = _(
     "Function purpose: {function_purpose}\n\n"
     "<INPUTS_TO_FUNCTION> {inputs_string} </INPUTS_TO_FUNCTION>\n\n"
     "<OUTPUT_OF_FUNCTION> {response_value} </OUTPUT_OF_FUNCTION>\n\n"
 )
 
 # Has the gradient on the output.
-CONVERSATION_START_INSTRUCTION_STRING_FN_CHAIN = (
+CONVERSATION_START_INSTRUCTION_STRING_FN_CHAIN = _(
     "You will give feedback to a variable with the following role: <ROLE> {variable_desc} </ROLE>. "
     "Here is an evaluation of a string-based function with inputs and outputs :\n\n"
     "{conversation}"
 )
 
 # Does not have gradient on the output
-CONVERSATION_START_INSTRUCTION_STRING_FN_BASE = (
+CONVERSATION_START_INSTRUCTION_STRING_FN_BASE = _(
     "You will give feedback to a variable with the following role: <ROLE> {variable_desc} </ROLE>. "
     "Here is an evaluation of the variable using a string-based function:\n\n"
     "{conversation}"
 )
 
-OBJECTIVE_INSTRUCTION_CHAIN = (
+OBJECTIVE_INSTRUCTION_CHAIN = _(
     "This conversation is part of a larger system. The <OUTPUT_OF_FUNCTION> was later used as {response_desc}.\n\n"
     "<OBJECTIVE_FUNCTION>Your goal is to give feedback to the variable to address the following feedback on the OUTPUT_OF_FUNCTION: {response_gradient} </OBJECTIVE_FUNCTION>\n\n"
 )
 
-OBJECTIVE_INSTRUCTION_BASE = (
+OBJECTIVE_INSTRUCTION_BASE = _(
     "<OBJECTIVE_FUNCTION>Your goal is to give feedback and criticism to the variable given the above evaluation output. "
     "Our only goal is to improve the above metric, and nothing else. </OBJECTIVE_FUNCTION>\n\n"
 )
 
 # Some instructions for the backward pass are shared with LLMs
-from .llm_backward_prompts import (
-    EVALUATE_VARIABLE_INSTRUCTION,
-    BACKWARD_SYSTEM_PROMPT
-)
+from .llm_backward_prompts import (BACKWARD_SYSTEM_PROMPT,
+                                   EVALUATE_VARIABLE_INSTRUCTION)
+
 
 class StringBasedFunction(Function):
     def __init__(self, fn: Callable, function_purpose: str):

diff --git a/textgrad/defaults.py b/textgrad/defaults.py
@@ -1,5 +1,5 @@
-SYSTEM_PROMPT_DEFAULT_ROLE = (
+SYSTEM_PROMPT_DEFAULT_ROLE = _(
     "system prompt to the language model that specifies the behavior and strategies, which will be reused across queries"
 )
-VARIABLE_INPUT_DEFAULT_ROLE = "query to the language model"
-VARIABLE_OUTPUT_DEFAULT_ROLE = "response from the language model"
+VARIABLE_INPUT_DEFAULT_ROLE = _("query to the language model")
+VARIABLE_OUTPUT_DEFAULT_ROLE = _("response from the language model")
diff --git a/textgrad/i18n.py b/textgrad/i18n.py
@@ -0,0 +1,37 @@
+"""
+This module provides functions for language localization in the TextGrad package.
+"""
+import logging
+import gettext
+import os
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def _set_language(lang=None):
+    """
+    Set the language for localization.
+
+    Note:
+    - This function does not support changing the language dynamically after it has been set.
+    - The language is determined when the module is first imported by checking the following
+      environment variables: LANGUAGE, LC_ALL, LC_MESSAGES, and LANG.
+
+    Args:
+        lang (str, optional): The language code to set. Defaults to None.
+    """
+    locale_dir = os.path.join(os.path.dirname(__file__), 'locales')
+    gettext.bindtextdomain('textgrad', locale_dir)
+    gettext.textdomain('textgrad')
+    if lang is not None:
+        lang = [lang]
+    try:
+        translation = gettext.translation('textgrad', locale_dir, languages=lang)
+    except FileNotFoundError:
+        if lang and lang != 'en':
+            logger.warning("Language '%s' not found under %s. Using fallback options.",
+                           lang, locale_dir)
+        translation = gettext.translation('textgrad', locale_dir, languages=lang, fallback=True)
+    translation.install()