Update subqueries building import query based on neo4j version

langchain-ai · Nov 20, 2024 · 466b3bc · 466b3bc
1 parent f619c6f
commit 466b3bc
Show file tree

Hide file tree

Showing 5 changed files with 838 additions and 290 deletions.
diff --git a/libs/neo4j/examples/graph_prompting.py b/libs/neo4j/examples/graph_prompting.py
@@ -0,0 +1,125 @@
+import os
+
+from langchain_neo4j.chains.graph_qa.cypher import GraphCypherQAChain
+from langchain_neo4j.graphs.neo4j_graph import Neo4jGraph
+from langchain_neo4j.vectorstores.neo4j_vector import Neo4jVector
+
+os.environ["NEO4J_URI"] = "bolt://localhost:7687"
+os.environ["NEO4J_USERNAME"] = "neo4j"
+os.environ["NEO4J_PASSWORD"] = "password"
+
+graph = Neo4jGraph()
+
+# Import movie information
+
+movies_query = """
+LOAD CSV WITH HEADERS FROM 
+'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv'
+AS row
+MERGE (m:Movie {id:row.movieId})
+SET m.released = date(row.released),
+    m.title = row.title,
+    m.imdbRating = toFloat(row.imdbRating)
+FOREACH (director in split(row.director, '|') | 
+    MERGE (p:Person {name:trim(director)})
+    MERGE (p)-[:DIRECTED]->(m))
+FOREACH (actor in split(row.actors, '|') | 
+    MERGE (p:Person {name:trim(actor)})
+    MERGE (p)-[:ACTED_IN]->(m))
+FOREACH (genre in split(row.genres, '|') | 
+    MERGE (g:Genre {name:trim(genre)})
+    MERGE (m)-[:IN_GENRE]->(g))
+"""
+
+graph.query(movies_query)
+
+graph.refresh_schema()
+
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
+chain = GraphCypherQAChain.from_llm(
+    graph=graph,
+    llm=llm,
+    exclude_types=["Genre"],
+    verbose=True,
+    allow_dangerous_requests=True,
+)
+
+examples = [
+    {
+        "question": "How many artists are there?",
+        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)",
+    },
+    {
+        "question": "Which actors played in the movie Casino?",
+        "query": "MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name",
+    },
+    {
+        "question": "How many movies has Tom Hanks acted in?",
+        "query": "MATCH (a:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN count(m)",
+    },
+    {
+        "question": "List all the genres of the movie Schindler's List",
+        "query": "MATCH (m:Movie {{title: 'Schindler\\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name",
+    },
+    {
+        "question": "Which actors have worked in movies from both the comedy and action genres?",
+        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name",
+    },
+    {
+        "question": "Which directors have made movies with at least three different actors named 'John'?",
+        "query": "MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name",
+    },
+    {
+        "question": "Identify movies where directors also played a role in the film.",
+        "query": "MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m) RETURN m.title, p.name",
+    },
+    {
+        "question": "Find the actor with the highest number of movies in the database.",
+        "query": "MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1",
+    },
+]
+
+from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
+
+example_prompt = PromptTemplate.from_template(
+    "User input: {question}\nCypher query: {query}"
+)
+prompt = FewShotPromptTemplate(
+    examples=examples[:5],
+    example_prompt=example_prompt,
+    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
+    suffix="User input: {question}\nCypher query: ",
+    input_variables=["question", "schema"],
+)
+
+
+from langchain_core.example_selectors import SemanticSimilarityExampleSelector
+from langchain_openai import OpenAIEmbeddings
+
+example_selector = SemanticSimilarityExampleSelector.from_examples(
+    examples,
+    OpenAIEmbeddings(),
+    Neo4jVector,
+    k=5,
+    input_keys=["question"],
+)
+
+
+prompt = FewShotPromptTemplate(
+    example_selector=example_selector,
+    example_prompt=example_prompt,
+    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
+    suffix="User input: {question}\nCypher query: ",
+    input_variables=["question", "schema"],
+)
+llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
+chain = GraphCypherQAChain.from_llm(
+    graph=graph,
+    llm=llm,
+    cypher_prompt=prompt,
+    verbose=True,
+    allow_dangerous_requests=True,
+)
+chain.invoke("How many actors are in the graph?")
diff --git a/libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py b/libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py
@@ -905,17 +905,7 @@ def add_embeddings(
         if not metadatas:
             metadatas = [{} for _ in texts]
 
-        import_query = (
-            "UNWIND $data AS row "
-            "CALL (row) { WITH row "
-            f"MERGE (c:`{self.node_label}` {{id: row.id}}) "
-            "WITH c, row "
-            f"CALL db.create.setNodeVectorProperty(c, "
-            f"'{self.embedding_node_property}', row.embedding) "
-            f"SET c.`{self.text_node_property}` = row.text "
-            "SET c += row.metadata "
-            "} IN TRANSACTIONS OF 1000 ROWS "
-        )
+        import_query = self._build_import_query()
 
         parameters = {
             "data": [
@@ -930,6 +920,32 @@ def add_embeddings(
 
         return ids
 
+    def _build_import_query(self) -> str:
+        """
+        Build the Cypher import query string based on the Neo4j version.
+
+        Returns:
+            str: The constructed Cypher query string.
+        """
+        if self.neo4j_version_is_5_23_or_above:
+            call_prefix = "CALL (row) { "
+        else:
+            call_prefix = "CALL { WITH row "
+
+        import_query = (
+            "UNWIND $data AS row "
+            f"{call_prefix}"
+            f"MERGE (c:`{self.node_label}` {{id: row.id}}) "
+            "WITH c, row "
+            f"CALL db.create.setNodeVectorProperty(c, "
+            f"'{self.embedding_node_property}', row.embedding) "
+            f"SET c.`{self.text_node_property}` = row.text "
+            "SET c += row.metadata "
+            "} IN TRANSACTIONS OF 1000 ROWS "
+        )
+
+        return import_query
+
     def add_texts(
         self,
         texts: Iterable[str],