Skip to content

Commit

Permalink
feat: Add max_characters setting to RAG (#895)
Browse files Browse the repository at this point in the history
  • Loading branch information
Wendong-Fan authored Sep 5, 2024
1 parent ded71b8 commit bdd69fa
Show file tree
Hide file tree
Showing 8 changed files with 16 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ body:
attributes:
label: What version of camel are you using?
description: Run command `python3 -c 'print(__import__("camel").__version__)'` in your shell and paste the output here.
placeholder: E.g., 0.1.6.8
placeholder: E.g., 0.1.6.9
validations:
required: true

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ conda create --name camel python=3.10
conda activate camel
# Clone github repo
git clone -b v0.1.6.8 https://github.com/camel-ai/camel.git
git clone -b v0.1.6.9 https://github.com/camel-ai/camel.git
# Change directory into project directory
cd camel
Expand Down
2 changes: 1 addition & 1 deletion camel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========

__version__ = '0.1.6.8'
__version__ = '0.1.6.9'

__all__ = [
'__version__',
Expand Down
5 changes: 4 additions & 1 deletion camel/retrievers/auto_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ def run_vector_retriever(
top_k: int = DEFAULT_TOP_K_RESULTS,
similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
return_detailed_info: bool = False,
max_characters: int = 500,
) -> dict[str, Sequence[Collection[str]]]:
r"""Executes the automatic vector retriever process using vector
storage.
Expand All @@ -198,6 +199,8 @@ def run_vector_retriever(
return_detailed_info (bool, optional): Whether to return detailed
information including similarity score, content path and
metadata. Defaults to `False`.
max_characters (int): Max number of characters in each chunk.
Defaults to `500`.
Returns:
dict[str, Sequence[Collection[str]]]: By default, returns
Expand Down Expand Up @@ -262,7 +265,7 @@ def run_vector_retriever(
storage=vector_storage_instance,
embedding_model=self.embedding_model,
)
vr.process(content)
vr.process(content=content, max_characters=max_characters)
else:
vr = VectorRetriever(
storage=vector_storage_instance,
Expand Down
7 changes: 6 additions & 1 deletion camel/retrievers/vector_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def process(
self,
content: Union[str, Element],
chunk_type: str = "chunk_by_title",
max_characters: int = 500,
**kwargs: Any,
) -> None:
r"""Processes content from a file or URL, divides it into chunks by
Expand All @@ -87,6 +88,8 @@ def process(
string content or Element object.
chunk_type (str): Type of chunking going to apply. Defaults to
"chunk_by_title".
max_characters (int): Max number of characters in each chunk.
Defaults to `500`.
**kwargs (Any): Additional keyword arguments for content parsing.
"""
if isinstance(content, Element):
Expand All @@ -101,7 +104,9 @@ def process(
elements = [self.uio.create_element_from_text(text=content)]
if elements:
chunks = self.uio.chunk_elements(
chunk_type=chunk_type, elements=elements
chunk_type=chunk_type,
elements=elements,
max_characters=max_characters,
)
if not elements:
warnings.warn(
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
project = 'CAMEL'
copyright = '2023, CAMEL-AI.org'
author = 'CAMEL-AI.org'
release = '0.1.6.8'
release = '0.1.6.9'

html_favicon = (
'https://raw.githubusercontent.com/camel-ai/camel/master/misc/favicon.png'
Expand Down
2 changes: 1 addition & 1 deletion docs/get_started/setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ conda create --name camel python=3.10
conda activate camel
# Clone github repo
git clone -b v0.1.6.8 https://github.com/camel-ai/camel.git
git clone -b v0.1.6.9 https://github.com/camel-ai/camel.git
# Change directory into project directory
cd camel
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "camel-ai"
version = "0.1.6.8"
version = "0.1.6.9"
authors = ["CAMEL-AI.org"]
description = "Communicative Agents for AI Society Study"
readme = "README.md"
Expand Down

0 comments on commit bdd69fa

Please sign in to comment.