camel-ai · Appointat · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 3, 2024
diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py
@@ -23,6 +23,7 @@
     List,
     Optional,
     Tuple,
+    Type,
     Union,
 )
 
@@ -319,7 +320,7 @@ def record_message(self, message: BaseMessage) -> None:
     def step(
         self,
         input_message: BaseMessage,
-        output_schema: Optional[BaseModel] = None,
+        output_schema: Optional[Type[BaseModel]] = None,
     ) -> ChatAgentResponse:
         r"""Performs a single step in the chat session by generating a response
         to the input message.
@@ -330,10 +331,10 @@ def step(
                 either `user` or `assistant` but it will be set to `user`
                 anyway since for the self agent any incoming message is
                 external.
-            output_schema (Optional[BaseModel]): An optional pydantic model
-                that includes value types and field descriptions used to
-                generate a structured response by LLM. This schema helps
-                in defining the expected output format.
+            output_schema (Optional[Type[BaseModel]]): An optional pydantic
+                model that includes value types and field descriptions used to
+                generate a structured response by LLM. This schema helps in
+                defining the expected output format.
 
         Returns:
             ChatAgentResponse: A struct containing the output messages,
@@ -450,7 +451,7 @@ def step(
     async def step_async(
         self,
         input_message: BaseMessage,
-        output_schema: Optional[BaseModel] = None,
+        output_schema: Optional[Type[BaseModel]] = None,
     ) -> ChatAgentResponse:
         r"""Performs a single step in the chat session by generating a response
         to the input message. This agent step can call async function calls.
@@ -461,10 +462,10 @@ async def step_async(
                 either `user` or `assistant` but it will be set to `user`
                 anyway since for the self agent any incoming message is
                 external.
-            output_schema (Optional[BaseModel]): An optional pydantic model
-                that includes value types and field descriptions used to
-                generate a structured response by LLM. This schema helps
-                in defining the expected output format.
+            output_schema (Optional[Type[BaseModel]]): An optional pydantic
+                model that includes value types and field descriptions used to
+                generate a structured response by LLM. This schema helps in
+                defining the expected output format.
 
         Returns:
             ChatAgentResponse: A struct containing the output messages,
@@ -614,13 +615,13 @@ def _add_tools_for_func_call(
         # result message
         return tool_calls, func_assistant_msg, func_result_msg
 
-    def _add_output_schema_to_tool_list(self, output_schema: BaseModel):
+    def _add_output_schema_to_tool_list(self, output_schema: Type[BaseModel]):
         r"""Handles the structured output response for OpenAI.
         This method processes the given output schema and integrates the
         resulting function into the tools for the OpenAI model configuration.
         Args:
-            output_schema (BaseModel): The schema representing the expected
-                output structure.
+            output_schema (Type[BaseModel]): The schema representing the
+                expected output structure.
         """
         from camel.toolkits import OpenAIFunction
 

diff --git a/camel/configs/openai_config.py b/camel/configs/openai_config.py
@@ -125,6 +125,8 @@ class OpenSourceConfig(BaseConfig):
             which will be used as the API base of OpenAI API.
         api_params (ChatGPTConfig): An instance of :obj:ChatGPTConfig to
             contain the arguments to be passed to OpenAI API.
+        model_kwargs (dict, optional): Additional keyword arguments to pass
+            to the model constructor. (default: :obj:`{}`)
     """
 
     # Maybe the param needs to be renamed.
@@ -133,3 +135,4 @@ class OpenSourceConfig(BaseConfig):
     model_path: str
     server_url: str
     api_params: ChatGPTConfig = Field(default_factory=ChatGPTConfig)
+    model_kwargs: Optional[dict] = Field(default_factory=dict)
diff --git a/camel/models/__init__.py b/camel/models/__init__.py
@@ -25,6 +25,7 @@
 from .openai_audio_models import OpenAIAudioModels
 from .openai_compatibility_model import OpenAICompatibilityModel
 from .openai_model import OpenAIModel
+from .schema_model import SchemaModel
 from .stub_model import StubModel
 from .vllm_model import VLLMModel
 from .zhipuai_model import ZhipuAIModel
@@ -46,5 +47,6 @@
     'OllamaModel',
     'VLLMModel',
     'GeminiModel',
+    'SchemaModel',
     'OpenAICompatibilityModel',
 ]
diff --git a/camel/models/model_factory.py b/camel/models/model_factory.py
@@ -24,6 +24,7 @@
 from camel.models.open_source_model import OpenSourceModel
 from camel.models.openai_compatibility_model import OpenAICompatibilityModel
 from camel.models.openai_model import OpenAIModel
+from camel.models.schema_model import SchemaModel
 from camel.models.stub_model import StubModel
 from camel.models.vllm_model import VLLMModel
 from camel.models.zhipuai_model import ZhipuAIModel
@@ -108,6 +109,11 @@ def create(
                 model_class = VLLMModel
             elif model_platform.is_litellm:
                 model_class = LiteLLMModel
+            elif model_platform.is_outlines:
+                model_class = SchemaModel
+                return model_class(
+                    model_platform, model_type, model_config_dict, url
+                )
             elif model_platform.is_openai_compatibility_model:
                 model_class = OpenAICompatibilityModel
             else:

diff --git a/camel/models/schema_model.py b/camel/models/schema_model.py
@@ -0,0 +1,232 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+import json
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Type,
+    TypeVar,
+    Union,
+    overload,
+)
+
+from openai import Stream
+from pydantic import BaseModel, ValidationError
+
+from camel.messages import OpenAIMessage
+from camel.models import BaseModelBackend
+from camel.types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    ChatCompletionMessage,
+    Choice,
+    ModelPlatformType,
+    ModelType,
+)
+from camel.utils import (
+    BaseTokenCounter,
+    OpenAITokenCounter,
+)
+
+T = TypeVar('T', bound=BaseModel)
+
+
+class SchemaModel(BaseModelBackend):
+    r"""Shema model in a unified BaseModelBackend interface, which aims to
+    generate the formatted response."""
+
+    def __init__(
+        self,
+        model_platform: ModelPlatformType,
+        model_type: str,
+        model_config_dict: Dict[str, Any],
+        url: Optional[str] = None,
+    ) -> None:
+        r"""Constructor for open-source backend.
+
+        Args:
+            model_platform (ModelPlatformType): Platform from which the model
+                originates, including transformers, llama_cpp, and vllm.
+            model_type (str): Model for which a backend is created, for
+                example, "mistralai/Mistral-7B-v0.3".
+            model_config_dict (Dict[str, Any]): A dictionary that will
+                be fed into openai.ChatCompletion.create().
+            url (Optional[str]): The url to the OpenAI service.
+        """
+        from outlines import models  # type: ignore[import]
+
+        self.model_platform = model_platform
+        self.model_name = model_type
+        self.model_config_dict = model_config_dict
+        self._client: Union[models.Transformers, models.LlamaCpp, models.VLLM]
+        self._url = url
+
+        # Since Outlines suports multiple model types, it is necessary to
+        # read the documentation to learn about the model kwargs:
+        # https://outlines-dev.github.io/outlines/reference/models/transformers
+        if self.model_platform == ModelPlatformType.OUTLINES_TRANSFORMERS:
+            model_kwargs = self.model_config_dict.get("model_kwargs", {})
+            device = self.model_config_dict.get("device", None)
+            tokenizer_kwargs = self.model_config_dict.get(
+                "tokenizer_kwargs", {}
+            )
+
+            self._client = models.transformers(
+                model_name=self.model_name,
+                device=device,
+                model_kwargs=model_kwargs,
+                tokenizer_kwargs=tokenizer_kwargs,
+            )
+        elif self.model_platform == ModelPlatformType.OUTLINES_LLAMACPP:
+            repo_id = self.model_config_dict.get(
+                "repo_id", "TheBloke/phi-2-GGUF"
+            )
+            filename = self.model_config_dict.get(
+                "filename", "phi-2.Q4_K_M.gguf"
+            )
+            download_dir = self.model_config_dict.get("download_dir", None)
+            model_kwargs = self.model_config_dict.get("model_kwargs", {})
+
+            from llama_cpp import llama_tokenizer  # type: ignore[import]
+
+            # Initialize the tokenizer
+            tokenizer = llama_tokenizer.LlamaHFTokenizer.from_pretrained(
+                repo_id
+            )  # type: ignore[attr-defined]
+
+            self._client = models.llamacpp(  # type: ignore[attr-defined]
+                repo_id=repo_id,
+                filename=filename,
+                download_dir=download_dir,
+                tokenizer=tokenizer,
+                **model_kwargs,
+            )
+        elif self.model_platform == ModelPlatformType.OUTLINES_VLLM:
+            model_kwargs = self.model_config_dict.get("model_kwargs", {})
+
+            self._client = models.vllm(
+                model_name=self.model_name,
+                **model_kwargs,
+            )
+        else:
+            raise ValueError(
+                f"Unsupported model by Outlines: {self.model_name}"
+            )
+
+        self._token_counter: Optional[BaseTokenCounter] = None
+
+    @property
+    def token_counter(self) -> BaseTokenCounter:
+        r"""Initialize the token counter for the model backend.
+
+        Returns:
+            BaseTokenCounter: The token counter following the model's
+                tokenization style.
+        """
+        if not self._token_counter:
+            # The default model type is GPT_3_5_TURBO, since the self-hosted
+            # models are not supported in the token counter.
+            self._token_counter = OpenAITokenCounter(ModelType.GPT_3_5_TURBO)
+        return self._token_counter
+
+    @overload
+    def run(
+        self,
+        messages: List[OpenAIMessage],
+    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]: ...
+
+    @overload
+    def run(
+        self,
+        messages: List[OpenAIMessage],
+        output_schema: Type[T],
+    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]: ...
+
+    def run(
+        self,
+        messages: List[OpenAIMessage],
+        output_schema: Optional[Type[T]] = None,
+    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
+        if output_schema is None:
+            raise NotImplementedError(
+                "run without output_schema is not implemented"
+            )
+
+        from outlines import generate  # type: ignore[import]
+
+        generator = generate.json(self._client, output_schema)
+
+        if not messages:
+            raise ValueError("The messages list should not be empty.")
+        message = messages[-1]
+        message_str = (
+            f"{message.get('role', '')}: {message.get('content', '')}"
+        )
+
+        parsed_response = generator(message_str)
+        json_response = json.dumps(str(parsed_response))
+
+        # Verify the structured format
+        try:
+            _ = output_schema(**json.loads(json_response))
+        except ValidationError as e:
+            raise ValueError(
+                f"Generated response does not match the output schema: {e}"
+            )
+
+        import time
+
+        response = ChatCompletion(
+            id=f"chatcmpl-{time.time()}",
+            created=int(time.time()),
+            model=self.model_name,
+            object="chat.completion",
+            choices=[
+                Choice(
+                    index=0,
+                    message=ChatCompletionMessage(
+                        role="assistant",
+                        content=json_response,
+                    ),
+                    finish_reason="stop",
+                ),
+            ],
+        )
+
+        return response
+
+    def check_model_config(self):
+        r"""Check whether the model configuration contains the required
+        arguments for the schema-based model.
+
+        Raises:
+            Warning: If the model configuration dictionary does not contain
+                the required arguments for the schema-based model, the warnings
+                are raised.
+        """
+        # Check the model_name, WarningError if not found
+        if "model_name" not in self.model_config_dict:
+            raise Warning("The model_name is set to the default value.")
+
+    @property
+    def stream(self) -> bool:
+        r"""Returns whether the model is in stream mode,
+            which sends partial results each time.
+
+        Returns:
+            bool: Whether the model is in stream mode.
+        """
+        return self.model_config_dict.get('stream', False)
diff --git a/camel/types/enums.py b/camel/types/enums.py
@@ -442,6 +442,9 @@ class ModelPlatformType(Enum):
     ZHIPU = "zhipuai"
     DEFAULT = "default"
     GEMINI = "gemini"
+    OUTLINES_TRANSFORMERS = "outlines-transformers"
+    OUTLINES_LLAMACPP = "outlines-llamacpp"
+    OUTLINES_VLLM = "outlines-vllm"
     VLLM = "vllm"
     MISTRAL = "mistral"
     OPENAI_COMPATIBILITY_MODEL = "openai-compatibility-model"
@@ -507,6 +510,15 @@ def is_gemini(self) -> bool:
         r"""Returns whether this platform is Gemini."""
         return self is ModelPlatformType.GEMINI
 
+    @property
+    def is_outlines(self) -> bool:
+        r"""Returns whether this platform is Outlines."""
+        return self in {
+            ModelPlatformType.OUTLINES_TRANSFORMERS,
+            ModelPlatformType.OUTLINES_LLAMACPP,
+            ModelPlatformType.OUTLINES_VLLM,
+        }
+
 
 class AudioModelType(Enum):
     TTS_1 = "tts-1"