Skip to content

Commit

Permalink
Merge pull request #4 from the-seeds/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
hiyouga authored Jun 18, 2024
2 parents 438700d + cd69c18 commit cc61818
Show file tree
Hide file tree
Showing 18 changed files with 65 additions and 103 deletions.
12 changes: 8 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ruff
python -m pip install .[dev]
- name: Check quality
Expand All @@ -45,7 +46,7 @@ jobs:
needs: check_code_quality

env:
OPENAI_BASE_URL: https://ai-yyds.com/v1
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
DEFAULT_EMBED_MODEL: text-embedding-ada-002
DEFAULT_CHAT_MODEL: gpt-3.5-turbo
Expand All @@ -67,6 +68,7 @@ jobs:
ports:
- 6379:6379


runs-on: ubuntu-latest

steps:
Expand All @@ -83,21 +85,23 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .[dev]
python -m pip install -e .
python -m pip install -r requirements.txt
python -m pip install -r requirements-dev.txt
python -m pip install pytest
wget https://github.com/milvus-io/milvus/releases/download/v2.4.4/milvus-standalone-docker-compose.yml -O docker-compose.yml
sudo docker compose up -d
- name: Test with pytest
run: |
make test
- name: Configure sysctl limits
- name: Configure Elasticsearch
run: |
sudo swapoff -a
sudo sysctl -w vm.swappiness=1
sudo sysctl -w fs.file-max=262144
sudo sysctl -w vm.max_map_count=262144
- name: Runs Elasticsearch
uses: elastic/elastic-github-actions/elasticsearch@master
with:
Expand Down
4 changes: 2 additions & 2 deletions src/cardinal/splitter/text_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ class TextSplitter:
https://github.com/langchain-ai/langchain/blob/v0.1.5/libs/langchain/langchain/text_splitter.py
"""

def __init__(self, chunk_size: Optional[int] = None, chuck_overlap: Optional[int] = None) -> None:
def __init__(self, chunk_size: Optional[int] = None, chunk_overlap: Optional[int] = None) -> None:
self._separators = ["\n\n", "\n", ". ", ", ", " ", ""]
self._chunk_size = chunk_size if chunk_size is not None else settings.default_chunk_size
self._chunk_overlap = chuck_overlap if chuck_overlap is not None else settings.default_chunk_overlap
self._chunk_overlap = chunk_overlap if chunk_overlap is not None else settings.default_chunk_overlap
assert self._chunk_overlap < self._chunk_size, "chunk overlap must be larger than chunk size"
self._counter = TokenCounter()

Expand Down
3 changes: 0 additions & 3 deletions tests/.idea/.gitignore

This file was deleted.

21 changes: 0 additions & 21 deletions tests/.idea/inspectionProfiles/Project_Default.xml

This file was deleted.

6 changes: 0 additions & 6 deletions tests/.idea/inspectionProfiles/profiles_settings.xml

This file was deleted.

4 changes: 0 additions & 4 deletions tests/.idea/misc.xml

This file was deleted.

8 changes: 0 additions & 8 deletions tests/.idea/modules.xml

This file was deleted.

11 changes: 0 additions & 11 deletions tests/.idea/tests.iml

This file was deleted.

6 changes: 0 additions & 6 deletions tests/.idea/vcs.xml

This file was deleted.

20 changes: 20 additions & 0 deletions tests/collector/test_base_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from pydantic import BaseModel
from typing import List
from cardinal.collector import BaseCollector
from cardinal.common import BaseMessage, AssistantMessage, HumanMessage

class History(BaseModel):
messages: List[BaseMessage]

def test_base_collector():
collector = BaseCollector[History](storage_name="test", drop_old=True)
messages = [HumanMessage(content="hi"), AssistantMessage(content="hi there")]
history1 = History(messages=messages)
collector.collect(history1)
messages = [HumanMessage(content="foo"), AssistantMessage(content="foo too")]
history2 = History(messages=messages)
collector.collect(history2)
results = collector.dump()
assert(results[0] == history1)
assert(results[1] == history2)
collector._storage.destroy()
14 changes: 0 additions & 14 deletions tests/collector/test_msg_collector.py

This file was deleted.

1 change: 0 additions & 1 deletion tests/model/test_embed_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest


@pytest.mark.skip(reason="no permission")
def test_embed_openai():
embed_openai = EmbedOpenAI()
assert(embed_openai.batch_embed(["This is a test"]) is not None)
Expand Down
7 changes: 3 additions & 4 deletions tests/retriever/test_dense_retriever.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from pydantic import BaseModel
from cardinal.vectorstore import AutoVectorStore
from cardinal.retriever import DenseRetriever
import pytest

class Animal(BaseModel):
name: str
Expand All @@ -10,9 +9,9 @@ class Animal(BaseModel):
data = [Animal(name=text) for text in texts]


@pytest.mark.skip(reason="no permission")
def test_dense_retriever():
vectorstore = AutoVectorStore[Animal].create(name="test", texts=texts, data=data, drop_old=True)
vectorStore = AutoVectorStore[Animal].create(name="test", texts=texts, data=data, drop_old=True)
retriever = DenseRetriever[Animal](vectorstore_name="test", verbose=True)
assert(retriever.retrieve(query="dog", top_k=1)[0] == data[1])
assert(retriever.retrieve(query="dog", top_k=1) == [data[1]])
vectorStore.destroy()

10 changes: 5 additions & 5 deletions tests/retriever/test_hybird_retriever.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from pydantic import BaseModel
from cardinal.vectorstore import AutoVectorStore
from cardinal.retriever import HybridRetriever
import pytest


class Animal(BaseModel):
Expand All @@ -12,11 +11,12 @@ class Animal(BaseModel):
data = [Animal(name=name, color=color) for name, color in animals]


@pytest.mark.skip(reason="no permission")
def test_hybird_retriever():
names = [animal.name for animal in data]
colors = [animal.color for animal in data]
AutoVectorStore[Animal].create(name="test1", texts=names, data=data, drop_old=True)
AutoVectorStore[Animal].create(name="test2", texts=colors, data=data, drop_old=True)
store1 = AutoVectorStore[Animal].create(name="test1", texts=names, data=data, drop_old=True)
store2 = AutoVectorStore[Animal].create(name="test2", texts=colors, data=data, drop_old=True)
retriever = HybridRetriever[Animal](vectorstore_names=["test1", "test2"], verbose=True)
print(retriever.retrieve(query="a pink dog", top_k=2))
assert(retriever.retrieve(query="a pink dog", top_k=2) == [data[2], data[3]])
store1.destroy()
store2.destroy()
6 changes: 4 additions & 2 deletions tests/retriever/test_sparse_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class Document(BaseModel):
def test_sparse_retriever():
storage = AutoStorage[Document](name="test")
storage.insert(keys=["doc1", "doc2"], values=[doc1, doc2])
if ENV_STORAGE == 'es':
storage._storage.database.indices.refresh()
retriever = SparseRetriever(storage_name="test", verbose=True)
assert(retriever.retrieve is not None)

assert(retriever.retrieve(query="alice", top_k=1) == [doc1])
storage.destroy()
2 changes: 1 addition & 1 deletion tests/splitter/test_text_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def test_text_splitter():
splitter = CJKTextSplitter(chunk_size=30, chuck_overlap=10)
splitter = CJKTextSplitter(chunk_size=30, chunk_overlap=10)
text = (
"The document presents FastEdit, a repository aimed at efficiently injecting "
"fresh and customized knowledge into large language models using a single command. "
Expand Down
12 changes: 7 additions & 5 deletions tests/storage/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ class Document(BaseModel):

def test_storage():
storage = AutoStorage[Document](name="test")

assert(not storage.exists()) # False
storage.insert(keys=["doc1", "doc2"], values=[doc1, doc2])
assert(storage.query("doc1")==doc1)
storage.clear()
assert(storage.query("doc1")==None)
assert(storage.exists()) # True
assert(storage.query("doc1") == doc1) # content='I am alice.' title='test'
storage.delete("doc1")
assert(storage.query("doc1") is None) # None
storage.unique_reset()
storage.unique_incr()
storage.unique_incr()
assert(storage.unique_get()==2)
assert(storage.unique_get() == 2) # 2
storage.destroy()

21 changes: 15 additions & 6 deletions tests/vectorstore/test_vector_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from cardinal.vectorstore import AutoVectorStore, AutoCondition
from pydantic import BaseModel
from enum import IntEnum
import os
import pytest


Expand All @@ -24,10 +25,18 @@ class Animal(BaseModel):
data = [Animal(name=text) for text in texts]


@pytest.mark.skip(reason="no permission")
def test_vector_store():
data = [Animal(name=text) for text in texts]
vecstore = AutoVectorStore[Animal].create(name="test", texts=texts, data=data, drop_old=True)
vecstore.delete(AutoCondition(key="name", value="dog", op=Operator.Eq))
print(vecstore.search(query="dog", top_k=2))

vectorStore = AutoVectorStore[Animal](name="test")
ENV_VECTORSTORE = os.getenv('VECTORSTORE')
assert(not vectorStore.exists()) # False
vectorStore.insert(texts=texts, data=data)
if ENV_VECTORSTORE == 'milvus':
vectorStore._vectorstore.store.flush()
vectorStore.delete(AutoCondition(key="name", value="dog", op=Operator.Eq))
if ENV_VECTORSTORE == 'milvus':
vectorStore._vectorstore.store.flush()
assert(vectorStore.search(query="dog", top_k=2)[0][0] == data[2])
assert(vectorStore.search(query="dog", top_k=2)[1][0] == data[1])
# [(Animal(name='puppy'), 0.8510237336158752), (Animal(name='llama'), 1.1970627307891846)]
assert(vectorStore.exists()) # True
vectorStore.destroy()

0 comments on commit cc61818

Please sign in to comment.