Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implement self-instruct and evolve-instruct synthetic data generation pipeline #720

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
24cff2c
add nemotron model
Wendong-Fan Jun 19, 2024
d5b307e
fix
Wendong-Fan Jun 19, 2024
737da4b
error handing
Wendong-Fan Jun 19, 2024
c3e1b9c
update pytest yml
Wendong-Fan Jun 19, 2024
e29d473
Implement self-instruct synthetic data generation pipeline
andrei3131 Jul 11, 2024
cd2c68d
adding evolve
Hither1 Jul 12, 2024
f2702de
debug evolve
Hither1 Jul 15, 2024
25edfc6
seed instruction files
Hither1 Jul 15, 2024
30169ca
add ipy notebook
Hither1 Jul 15, 2024
ea984a1
Enable configuring spec with list of seed instructions instead of pat…
andrei3131 Jul 16, 2024
7d5c58c
local
Jul 16, 2024
c99f061
local
Jul 16, 2024
6a62295
start jupyter notebook for self-instruct generation
andrei3131 Jul 16, 2024
7089b81
nemotron critic
Jul 17, 2024
e45b970
local
Jul 17, 2024
245377f
Merge branch 'synth_data_self_instruct' of https://github.com/camel-a…
Jul 17, 2024
4f3c7bb
Merge branch 'master' into role_play_nemotron_critic
andrei3131 Jul 17, 2024
137c45f
Merge branch 'role_play_nemotron_critic' into synth_data_self_instruct
andrei3131 Jul 17, 2024
5b0306d
fixed some issues with evolve instruct
Jul 17, 2024
a304d39
fixed some issues with evolve instruct
Jul 17, 2024
6794527
fixed some issues with evolve instruct
Jul 17, 2024
ecd4feb
fixed some issues with evolve instruct
Jul 17, 2024
78741e0
start to imple multi-agent system
Jul 18, 2024
5fa3358
Add Nvidia Nemtron for synthetic data evaluation
andrei3131 Jul 19, 2024
28f9750
Merge branch 'synth_data_self_instruct' of https://github.com/camel-a…
andrei3131 Jul 19, 2024
7fd3a5c
clean up
andrei3131 Jul 19, 2024
da057e2
deepseek math and coding
Jul 19, 2024
96f4cfa
move to subpackage of camel
Jul 19, 2024
8240a19
math and coder
Jul 21, 2024
9ea9ea3
math and coder
Jul 21, 2024
a36da80
add base instruct spec
Jul 21, 2024
734534e
fix coder
Jul 24, 2024
d98edff
fix coder
Jul 24, 2024
fe55e1a
fix according to reviews
Jul 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion camel/agents/chat_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,11 +522,19 @@ def handle_batch_response(
"""
output_messages: List[BaseMessage] = []
for choice in response.choices:
if isinstance(choice.message, list):
# If choice.message is a list, handle accordingly
# It's a check to fit with Nemotron model integration.
content = "".join(
[msg.content for msg in choice.message if msg.content]
)
else:
content = choice.message.content or ""
chat_message = BaseMessage(
role_name=self.role_name,
role_type=self.role_type,
meta_dict=dict(),
content=choice.message.content or "",
content=content,
)
output_messages.append(chat_message)
finish_reasons = [
Expand Down
6 changes: 6 additions & 0 deletions camel/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from .litellm_model import LiteLLMModel
from .model_factory import ModelFactory
from .nemotron_model import NemotronModel
from .nvidia_model import NvidiaModel
from .nvidia_model_v2 import NvidiaModelV2
from .nvidia_model_v3 import NvidiaModelV3
from .ollama_model import OllamaModel
from .open_source_model import OpenSourceModel
from .openai_audio_models import OpenAIAudioModels
Expand All @@ -34,6 +37,9 @@
'ModelFactory',
'LiteLLMModel',
'OpenAIAudioModels',
'NvidiaModel',
'NvidiaModelV2',
'NvidiaModelV3',
'NemotronModel',
'OllamaModel',
'GeminiModel',
Expand Down
125 changes: 125 additions & 0 deletions camel/models/nvidia_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
from typing import Any, Dict, List, Optional, Union

from openai import OpenAI, Stream

from camel.configs import OPENAI_API_PARAMS
from camel.messages import OpenAIMessage
from camel.models import BaseModelBackend
from camel.types import ChatCompletion, ChatCompletionChunk, ModelType
from camel.utils import (
BaseTokenCounter,
OpenAITokenCounter,
model_api_key_required,
)


class NvidiaModel(BaseModelBackend):
r"""Nvidia API in a unified BaseModelBackend interface."""

# NOTE: Nemotron model doesn't support additional model config like OpenAI.

def __init__(
self,
model_type: ModelType,
model_config_dict: Dict[str, Any],
api_key: Optional[str] = None,
) -> None:
r"""Constructor for Nvidia backend.

Args:
model_type (ModelType): Model for which a backend is created.
model_config_dict (Dict[str, Any]): A dictionary that will
be fed into openai.ChatCompletion.create().
api_key (Optional[str]): The API key for authenticating with the
Nvidia service. (default: :obj:`None`)
"""
super().__init__(model_type, model_config_dict)
url = os.environ.get('NVIDIA_API_BASE_URL', None)
self._api_key = api_key or os.environ.get("NVIDIA_API_KEY")
if not url or not self._api_key:
raise ValueError("The NVIDIA API base url and key should be set.")
self._client = OpenAI(
timeout=60, max_retries=3, base_url=url, api_key=self._api_key
)
self._token_counter: Optional[BaseTokenCounter] = None

@property
def token_counter(self) -> BaseTokenCounter:
r"""Initialize the token counter for the model backend.

Returns:
BaseTokenCounter: The token counter following the model's
tokenization style.
"""
if not self._token_counter:
# NOTE: It's a temporary setting for token counter.
self._token_counter = OpenAITokenCounter(ModelType.GPT_3_5_TURBO)
return self._token_counter

@model_api_key_required
def run(
self,
messages: List[OpenAIMessage],
) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
r"""Runs inference of OpenAI chat completion.

Args:
messages (List[OpenAIMessage]): Message list with the chat history
in OpenAI API format.

Returns:
Union[ChatCompletion, Stream[ChatCompletionChunk]]:
`ChatCompletion` in the non-stream mode.
"""
print(messages)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, you can use logging info to instead print func

# Nemotron model only accept 'user' or 'assistant' as role.
for message in messages:
if message['role'] not in ['user', 'assistant']:
message['role'] = 'assistant' # type: ignore[arg-type]
# user/assistant messages should alternate starting with `user`.
messages[0], messages[1] = messages[1], messages[0]

response = self._client.chat.completions.create(
messages=messages,
model=self.model_type.value,
)
return response

def check_model_config(self):
r"""Check whether the model configuration contains any
unexpected arguments to OpenAI API.

Raises:
ValueError: If the model configuration dictionary contains any
unexpected arguments to OpenAI API.
"""
for param in self.model_config_dict:
if param not in OPENAI_API_PARAMS:
raise ValueError(
f"Unexpected argument `{param}` is "
"input into OpenAI model backend."
)

@property
def stream(self) -> bool:
r"""Returns whether the model is in stream mode, which sends partial
results each time.

Returns:
bool: Whether the model is in stream mode.
"""
return self.model_config_dict.get('stream', False)
87 changes: 87 additions & 0 deletions camel/models/nvidia_model_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
from typing import List, Optional

from openai import OpenAI

from camel.messages import OpenAIMessage
from camel.types import ChatCompletion, ModelType
from camel.utils import (
BaseTokenCounter,
OpenAITokenCounter,
model_api_key_required,
)


class NvidiaModelV2:
r"""Nvidia API in a unified BaseModelBackend interface."""

# NOTE: Nemotron model doesn't support additional model config like OpenAI.

def __init__(
self,
model_type: ModelType,
api_key: Optional[str] = None,
) -> None:
r"""Constructor for Nvidia backend.

Args:
model_type (ModelType): Model for which a backend is created.
api_key (Optional[str]): The API key for authenticating with the
Nvidia service. (default: :obj:`None`)
"""
self.model_type = model_type
url = os.environ.get('NVIDIA_API_BASE_URL', None)
self._api_key = api_key or os.environ.get("NVIDIA_API_KEY")
if not url or not self._api_key:
raise ValueError(
"NVIDIA_API_BASE_URL and NVIDIA_API_KEY should be set."
)
self._client = OpenAI(
timeout=60, max_retries=3, base_url=url, api_key=self._api_key
)
self._token_counter: Optional[BaseTokenCounter] = None

@property
def token_counter(self) -> BaseTokenCounter:
r"""Initialize the token counter for the model backend.

Returns:
BaseTokenCounter: The token counter following the model's
tokenization style.
"""
if not self._token_counter:
# NOTE: It's a temporary setting for token counter.
self._token_counter = OpenAITokenCounter(ModelType.GPT_3_5_TURBO)
return self._token_counter

@model_api_key_required
def run(
self,
messages: List[OpenAIMessage],
) -> ChatCompletion:
r"""Runs inference of OpenAI chat completion.

Args:
messages (List[OpenAIMessage]): Message list.

Returns:
ChatCompletion.
"""
response = self._client.chat.completions.create(
messages=messages,
model=self.model_type.value,
)
return response
Loading