diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 3f50bd1..20e1dd2 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -35,6 +35,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
+ python -m pip install ruff
python -m pip install .[dev]
- name: Check quality
@@ -45,7 +46,7 @@ jobs:
needs: check_code_quality
env:
- OPENAI_BASE_URL: https://ai-yyds.com/v1
+ OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
DEFAULT_EMBED_MODEL: text-embedding-ada-002
DEFAULT_CHAT_MODEL: gpt-3.5-turbo
@@ -67,6 +68,7 @@ jobs:
ports:
- 6379:6379
+
runs-on: ubuntu-latest
steps:
@@ -83,7 +85,10 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- python -m pip install .[dev]
+ python -m pip install -e .
+ python -m pip install -r requirements.txt
+ python -m pip install -r requirements-dev.txt
+ python -m pip install pytest
wget https://github.com/milvus-io/milvus/releases/download/v2.4.4/milvus-standalone-docker-compose.yml -O docker-compose.yml
sudo docker compose up -d
@@ -91,13 +96,12 @@ jobs:
run: |
make test
- - name: Configure sysctl limits
+ - name: Configure Elasticsearch
run: |
sudo swapoff -a
sudo sysctl -w vm.swappiness=1
sudo sysctl -w fs.file-max=262144
sudo sysctl -w vm.max_map_count=262144
-
- name: Runs Elasticsearch
uses: elastic/elastic-github-actions/elasticsearch@master
with:
diff --git a/src/cardinal/splitter/text_splitter.py b/src/cardinal/splitter/text_splitter.py
index c62c151..83532d2 100644
--- a/src/cardinal/splitter/text_splitter.py
+++ b/src/cardinal/splitter/text_splitter.py
@@ -15,10 +15,10 @@ class TextSplitter:
https://github.com/langchain-ai/langchain/blob/v0.1.5/libs/langchain/langchain/text_splitter.py
"""
- def __init__(self, chunk_size: Optional[int] = None, chuck_overlap: Optional[int] = None) -> None:
+ def __init__(self, chunk_size: Optional[int] = None, chunk_overlap: Optional[int] = None) -> None:
self._separators = ["\n\n", "\n", ". ", ", ", " ", ""]
self._chunk_size = chunk_size if chunk_size is not None else settings.default_chunk_size
- self._chunk_overlap = chuck_overlap if chuck_overlap is not None else settings.default_chunk_overlap
+ self._chunk_overlap = chunk_overlap if chunk_overlap is not None else settings.default_chunk_overlap
assert self._chunk_overlap < self._chunk_size, "chunk overlap must be larger than chunk size"
self._counter = TokenCounter()
diff --git a/tests/.idea/.gitignore b/tests/.idea/.gitignore
deleted file mode 100644
index 26d3352..0000000
--- a/tests/.idea/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
diff --git a/tests/.idea/inspectionProfiles/Project_Default.xml b/tests/.idea/inspectionProfiles/Project_Default.xml
deleted file mode 100644
index 4f00b2d..0000000
--- a/tests/.idea/inspectionProfiles/Project_Default.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/tests/.idea/inspectionProfiles/profiles_settings.xml b/tests/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2d..0000000
--- a/tests/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/tests/.idea/misc.xml b/tests/.idea/misc.xml
deleted file mode 100644
index 7e83473..0000000
--- a/tests/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
\ No newline at end of file
diff --git a/tests/.idea/modules.xml b/tests/.idea/modules.xml
deleted file mode 100644
index dac5cbb..0000000
--- a/tests/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/tests/.idea/tests.iml b/tests/.idea/tests.iml
deleted file mode 100644
index 68a3566..0000000
--- a/tests/.idea/tests.iml
+++ /dev/null
@@ -1,11 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/tests/.idea/vcs.xml b/tests/.idea/vcs.xml
deleted file mode 100644
index 6c0b863..0000000
--- a/tests/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/tests/collector/test_base_collector.py b/tests/collector/test_base_collector.py
new file mode 100644
index 0000000..6a7cad0
--- /dev/null
+++ b/tests/collector/test_base_collector.py
@@ -0,0 +1,20 @@
+from pydantic import BaseModel
+from typing import List
+from cardinal.collector import BaseCollector
+from cardinal.common import BaseMessage, AssistantMessage, HumanMessage
+
+class History(BaseModel):
+ messages: List[BaseMessage]
+
+def test_base_collector():
+ collector = BaseCollector[History](storage_name="test", drop_old=True)
+ messages = [HumanMessage(content="hi"), AssistantMessage(content="hi there")]
+ history1 = History(messages=messages)
+ collector.collect(history1)
+ messages = [HumanMessage(content="foo"), AssistantMessage(content="foo too")]
+ history2 = History(messages=messages)
+ collector.collect(history2)
+ results = collector.dump()
+ assert(results[0] == history1)
+ assert(results[1] == history2)
+ collector._storage.destroy()
diff --git a/tests/collector/test_msg_collector.py b/tests/collector/test_msg_collector.py
deleted file mode 100644
index 49fab45..0000000
--- a/tests/collector/test_msg_collector.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from cardinal.collector import MsgCollector
-from cardinal.common import HumanMessage, AssistantMessage
-
-def test_msg_collector():
- collector = MsgCollector(storage_name="test", drop_old=True)
- messages = [HumanMessage(content="hi"), AssistantMessage(content="hi there")]
- collector.collect(messages)
- messages = [HumanMessage(content="foo"), AssistantMessage(content="foo too")]
- collector.collect(messages)
- results = collector.dump()
- assert(results[0][0].content=='hi')
- assert(results[0][1].content=='hi there')
- assert(results[1][0].content=='foo')
- assert(results[1][1].content=='foo too')
diff --git a/tests/model/test_embed_openai.py b/tests/model/test_embed_openai.py
index a1866fb..10c6f5d 100644
--- a/tests/model/test_embed_openai.py
+++ b/tests/model/test_embed_openai.py
@@ -2,7 +2,6 @@
import pytest
-@pytest.mark.skip(reason="no permission")
def test_embed_openai():
embed_openai = EmbedOpenAI()
assert(embed_openai.batch_embed(["This is a test"]) is not None)
diff --git a/tests/retriever/test_dense_retriever.py b/tests/retriever/test_dense_retriever.py
index 004a1da..0968828 100644
--- a/tests/retriever/test_dense_retriever.py
+++ b/tests/retriever/test_dense_retriever.py
@@ -1,7 +1,6 @@
from pydantic import BaseModel
from cardinal.vectorstore import AutoVectorStore
from cardinal.retriever import DenseRetriever
-import pytest
class Animal(BaseModel):
name: str
@@ -10,9 +9,9 @@ class Animal(BaseModel):
data = [Animal(name=text) for text in texts]
-@pytest.mark.skip(reason="no permission")
def test_dense_retriever():
- vectorstore = AutoVectorStore[Animal].create(name="test", texts=texts, data=data, drop_old=True)
+ vectorStore = AutoVectorStore[Animal].create(name="test", texts=texts, data=data, drop_old=True)
retriever = DenseRetriever[Animal](vectorstore_name="test", verbose=True)
- assert(retriever.retrieve(query="dog", top_k=1)[0] == data[1])
+ assert(retriever.retrieve(query="dog", top_k=1) == [data[1]])
+ vectorStore.destroy()
\ No newline at end of file
diff --git a/tests/retriever/test_hybird_retriever.py b/tests/retriever/test_hybird_retriever.py
index 66b3d4a..751b9a7 100644
--- a/tests/retriever/test_hybird_retriever.py
+++ b/tests/retriever/test_hybird_retriever.py
@@ -1,7 +1,6 @@
from pydantic import BaseModel
from cardinal.vectorstore import AutoVectorStore
from cardinal.retriever import HybridRetriever
-import pytest
class Animal(BaseModel):
@@ -12,11 +11,12 @@ class Animal(BaseModel):
data = [Animal(name=name, color=color) for name, color in animals]
-@pytest.mark.skip(reason="no permission")
def test_hybird_retriever():
names = [animal.name for animal in data]
colors = [animal.color for animal in data]
- AutoVectorStore[Animal].create(name="test1", texts=names, data=data, drop_old=True)
- AutoVectorStore[Animal].create(name="test2", texts=colors, data=data, drop_old=True)
+ store1 = AutoVectorStore[Animal].create(name="test1", texts=names, data=data, drop_old=True)
+ store2 = AutoVectorStore[Animal].create(name="test2", texts=colors, data=data, drop_old=True)
retriever = HybridRetriever[Animal](vectorstore_names=["test1", "test2"], verbose=True)
- print(retriever.retrieve(query="a pink dog", top_k=2))
+ assert(retriever.retrieve(query="a pink dog", top_k=2) == [data[2], data[3]])
+ store1.destroy()
+ store2.destroy()
\ No newline at end of file
diff --git a/tests/retriever/test_sparse_retriever.py b/tests/retriever/test_sparse_retriever.py
index 3a0c373..a9f79a3 100644
--- a/tests/retriever/test_sparse_retriever.py
+++ b/tests/retriever/test_sparse_retriever.py
@@ -20,6 +20,8 @@ class Document(BaseModel):
def test_sparse_retriever():
storage = AutoStorage[Document](name="test")
storage.insert(keys=["doc1", "doc2"], values=[doc1, doc2])
+ if ENV_STORAGE == 'es':
+ storage._storage.database.indices.refresh()
retriever = SparseRetriever(storage_name="test", verbose=True)
- assert(retriever.retrieve is not None)
-
\ No newline at end of file
+ assert(retriever.retrieve(query="alice", top_k=1) == [doc1])
+ storage.destroy()
\ No newline at end of file
diff --git a/tests/splitter/test_text_splitter.py b/tests/splitter/test_text_splitter.py
index e8c1128..3e04f06 100644
--- a/tests/splitter/test_text_splitter.py
+++ b/tests/splitter/test_text_splitter.py
@@ -2,7 +2,7 @@
def test_text_splitter():
- splitter = CJKTextSplitter(chunk_size=30, chuck_overlap=10)
+ splitter = CJKTextSplitter(chunk_size=30, chunk_overlap=10)
text = (
"The document presents FastEdit, a repository aimed at efficiently injecting "
"fresh and customized knowledge into large language models using a single command. "
diff --git a/tests/storage/test_storage.py b/tests/storage/test_storage.py
index e36de7d..44aefa7 100644
--- a/tests/storage/test_storage.py
+++ b/tests/storage/test_storage.py
@@ -12,13 +12,15 @@ class Document(BaseModel):
def test_storage():
storage = AutoStorage[Document](name="test")
-
+ assert(not storage.exists()) # False
storage.insert(keys=["doc1", "doc2"], values=[doc1, doc2])
- assert(storage.query("doc1")==doc1)
- storage.clear()
- assert(storage.query("doc1")==None)
+ assert(storage.exists()) # True
+ assert(storage.query("doc1") == doc1) # content='I am alice.' title='test'
+ storage.delete("doc1")
+ assert(storage.query("doc1") is None) # None
storage.unique_reset()
storage.unique_incr()
storage.unique_incr()
- assert(storage.unique_get()==2)
+ assert(storage.unique_get() == 2) # 2
+ storage.destroy()
\ No newline at end of file
diff --git a/tests/vectorstore/test_vector_store.py b/tests/vectorstore/test_vector_store.py
index e60f843..8255b62 100644
--- a/tests/vectorstore/test_vector_store.py
+++ b/tests/vectorstore/test_vector_store.py
@@ -1,6 +1,7 @@
from cardinal.vectorstore import AutoVectorStore, AutoCondition
from pydantic import BaseModel
from enum import IntEnum
+import os
import pytest
@@ -24,10 +25,18 @@ class Animal(BaseModel):
data = [Animal(name=text) for text in texts]
-@pytest.mark.skip(reason="no permission")
def test_vector_store():
- data = [Animal(name=text) for text in texts]
- vecstore = AutoVectorStore[Animal].create(name="test", texts=texts, data=data, drop_old=True)
- vecstore.delete(AutoCondition(key="name", value="dog", op=Operator.Eq))
- print(vecstore.search(query="dog", top_k=2))
-
\ No newline at end of file
+ vectorStore = AutoVectorStore[Animal](name="test")
+ ENV_VECTORSTORE = os.getenv('VECTORSTORE')
+ assert(not vectorStore.exists()) # False
+ vectorStore.insert(texts=texts, data=data)
+ if ENV_VECTORSTORE == 'milvus':
+ vectorStore._vectorstore.store.flush()
+ vectorStore.delete(AutoCondition(key="name", value="dog", op=Operator.Eq))
+ if ENV_VECTORSTORE == 'milvus':
+ vectorStore._vectorstore.store.flush()
+ assert(vectorStore.search(query="dog", top_k=2)[0][0] == data[2])
+ assert(vectorStore.search(query="dog", top_k=2)[1][0] == data[1])
+ # [(Animal(name='puppy'), 0.8510237336158752), (Animal(name='llama'), 1.1970627307891846)]
+ assert(vectorStore.exists()) # True
+ vectorStore.destroy()