From 043b6128c276f19769e24587ed7258951a58e848 Mon Sep 17 00:00:00 2001 From: jstzwj <1103870790@qq.com> Date: Wed, 27 Dec 2023 03:40:33 +0800 Subject: [PATCH] update readme --- README.md | 25 +++++++++++++++++++++---- chatproto/registry.py | 2 ++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6be580c..1596b98 100644 --- a/README.md +++ b/README.md @@ -3,21 +3,38 @@ Large Language Model Chat Protocol. The different chat prompt formats used by different Large Language Models have been a problem for developers. We developed `chatproto` to output the prompt format for different LLMs through a unified interface. +Compared to the `apply_chat_format` function in HuggingFace and the version in FastChat, `ChatProto` can locate the position of each message after applying the template. This makes it very convenient for us to mask out certain conversations during training. + ## Quick Start ```python from chatproto.conversation.history import ConversationHistory -from chatproto.conversation.models.baichuan import baichuan +from chatproto.registry import list_conv_settings, get_conv_settings + +# Print all available settings +all_settings = list_conv_settings() +print(all_settings) +settings = get_conv_settings("openbuddy") history = ConversationHistory( "SYSTEM_MESSAGE", messages=[ - (baichuan.roles[0], "user"), - (baichuan.roles[1], "assistant"), + (settings.roles[0], "Hello!"), + (settings.roles[1], "Hello! How can I assist you today?"), ], offset=0, - settings=baichuan + settings=settings ) +# Apply the template print(history.get_prompt()) + +# Get prompt and indices +prompt, indices = history.get_prompt_and_indices() +# Print the start and end offsets of each message in the conversation one by one. +# The start and end offsets here refer to the offsets in the text, not the tokens. +# They do not include any additional characters added in the template. +system_start, system_end = indices[0] +for i, (conv_start, conv_end) in enumerate(indices[1:]): + print((conv_start, conv_end)) ``` ## Install diff --git a/chatproto/registry.py b/chatproto/registry.py index e1bf3ef..aa1e97b 100644 --- a/chatproto/registry.py +++ b/chatproto/registry.py @@ -22,6 +22,8 @@ def get_conv_settings(name: str) -> ConversationSettings: """Get a ConversationSettings template.""" return conv_settings[name].copy() +def list_conv_settings() -> List[str]: + return list(conv_settings.keys()) settings_path = os.path.join(os.path.dirname(__file__), "conversation", "models") for module_loader, name, ispkg in pkgutil.iter_modules([settings_path]):