Skip to content

Commit

Permalink
WIP: e2e tests
Browse files Browse the repository at this point in the history
  • Loading branch information
stellasia committed Dec 30, 2024
1 parent 0f5616c commit b4f59e4
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 9 deletions.
13 changes: 12 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ json-repair = "^0.30.2"
types-pyyaml = "^6.0.12.20240917"
ollama = {version = "^0.4.4", optional = true}
uuid = "^1.30"
weaviate = "^0.1.2"

[tool.poetry.group.dev.dependencies]
urllib3 = "<2"
Expand Down
6 changes: 6 additions & 0 deletions tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ def driver() -> Generator[Any, Any, Any]:
driver.close()


@pytest.fixture(scope="function", autouse=True)
def clear_db(driver: Driver) -> Any:
driver.execute_query("MATCH (n) DETACH DELETE n")
yield


@pytest.fixture(scope="function")
def llm() -> MagicMock:
return MagicMock(spec=LLMInterface)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,6 @@
from neo4j_graphrag.llm import LLMResponse


@pytest.fixture(scope="function", autouse=True)
def clear_db(driver: neo4j.Driver) -> Any:
driver.execute_query("MATCH (n) DETACH DELETE n")
yield


@pytest.mark.asyncio
async def test_pipeline_from_json_config(harry_potter_text: str, driver: Mock) -> None:
os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
Expand Down
147 changes: 145 additions & 2 deletions tests/e2e/test_simplekgpipeline_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,148 @@ async def test_pipeline_builder_happy_path(
)

# Run the knowledge graph building process with text input
text_input = "John Doe lives in New York City."
await kg_builder_text.run_async(text=text_input)
await kg_builder_text.run_async(text=harry_potter_text)



@pytest.mark.asyncio
@pytest.mark.usefixtures("setup_neo4j_for_kg_construction")
async def test_pipeline_builder_two_documents(
harry_potter_text_part1: str,
harry_potter_text_part2: str,
llm: MagicMock,
embedder: MagicMock,
driver: neo4j.Driver,
) -> None:
"""When everything works as expected, extracted entities, relations and text
chunks must be in the DB
"""
driver.execute_query("MATCH (n) DETACH DELETE n")
embedder.embed_query.return_value = [1, 2, 3]
llm.ainvoke.side_effect = [
# first document
# first chunk
LLMResponse(
content="""{
"nodes": [
{
"id": "0",
"label": "Person",
"properties": {
"name": "Harry Potter"
}
},
],
"relationships": []
}"""
),
# second chunk
LLMResponse(content='{"nodes": [], "relationships": []}'),
# second document
# first chunk
LLMResponse(
content="""{
"nodes": [
{
"id": "0",
"label": "Person",
"properties": {
"name": "Hermione Granger"
}
},
],
"relationships": []
}"""
),
# second chunk
LLMResponse(content='{"nodes": [], "relationships": []}'),
]

# Create an instance of the SimpleKGPipeline
kg_builder_text = SimpleKGPipeline(
llm=llm,
driver=driver,
embedder=embedder,
from_pdf=False,
)

# Run the knowledge graph building process with text input
await kg_builder_text.run_async(text=harry_potter_text_part1)
await kg_builder_text.run_async(text=harry_potter_text_part2)

# check graph content
records, _, _ = driver.execute_query("MATCH (n) RETURN n")
print(records)

assert False


@pytest.mark.asyncio
@pytest.mark.usefixtures("setup_neo4j_for_kg_construction")
async def test_pipeline_builder_same_document_two_runs(
harry_potter_text_part1: str,
llm: MagicMock,
embedder: MagicMock,
driver: neo4j.Driver,
) -> None:
"""When everything works as expected, extracted entities, relations and text
chunks must be in the DB
"""
driver.execute_query("MATCH (n) DETACH DELETE n")
embedder.embed_query.return_value = [1, 2, 3]
llm.ainvoke.side_effect = [
# first run
# first chunk
LLMResponse(
content="""{
"nodes": [
{
"id": "0",
"label": "Person",
"properties": {
"name": "Harry Potter"
}
},
],
"relationships": []
}"""
),
# second chunk
LLMResponse(content='{"nodes": [], "relationships": []}'),
# second run
# first chunk
LLMResponse(
content="""{
"nodes": [
{
"id": "0",
"label": "Person",
"properties": {
"name": "Harry Potter"
}
},
],
"relationships": []
}"""
),
# second chunk
LLMResponse(content='{"nodes": [], "relationships": []}'),
]

# Create an instance of the SimpleKGPipeline
kg_builder_text = SimpleKGPipeline(
llm=llm,
driver=driver,
embedder=embedder,
from_pdf=False,
)

# Run the knowledge graph building process with text input
await kg_builder_text.run_async(text=harry_potter_text_part1)
await kg_builder_text.run_async(text=harry_potter_text_part1)

# check graph content
records, _, _ = driver.execute_query("MATCH (n) RETURN n")
print(records)

assert False

0 comments on commit b4f59e4

Please sign in to comment.