-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make PdfLoader work with config files
- Loading branch information
Showing
6 changed files
with
194 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
examples/build_graph/from_config_files/simple_kg_pipeline_config_url.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
{ | ||
"version_": "1", | ||
"template_": "SimpleKGPipeline", | ||
"neo4j_config": { | ||
"params_": { | ||
"uri": { | ||
"resolver_": "ENV", | ||
"var_": "NEO4J_URI" | ||
}, | ||
"user": { | ||
"resolver_": "ENV", | ||
"var_": "NEO4J_USER" | ||
}, | ||
"password": { | ||
"resolver_": "ENV", | ||
"var_": "NEO4J_PASSWORD" | ||
} | ||
} | ||
}, | ||
"llm_config": { | ||
"class_": "OpenAILLM", | ||
"params_": { | ||
"api_key": { | ||
"resolver_": "ENV", | ||
"var_": "OPENAI_API_KEY" | ||
}, | ||
"model_name": "gpt-4o", | ||
"model_params": { | ||
"temperature": 0, | ||
"max_tokens": 2000, | ||
"response_format": {"type": "json_object"} | ||
} | ||
} | ||
}, | ||
"embedder_config": { | ||
"class_": "OpenAIEmbeddings", | ||
"params_": { | ||
"api_key": { | ||
"resolver_": "ENV", | ||
"var_": "OPENAI_API_KEY" | ||
} | ||
} | ||
}, | ||
"from_pdf": true, | ||
"entities": [ | ||
"Person", | ||
{ | ||
"label": "House", | ||
"description": "Family the person belongs to", | ||
"properties": [ | ||
{ | ||
"name": "name", | ||
"type": "STRING" | ||
} | ||
] | ||
}, | ||
{ | ||
"label": "Planet", | ||
"properties": [ | ||
{ | ||
"name": "name", | ||
"type": "STRING" | ||
}, | ||
{ | ||
"name": "weather", | ||
"type": "STRING" | ||
} | ||
] | ||
} | ||
], | ||
"relations": [ | ||
"PARENT_OF", | ||
{ | ||
"label": "HEIR_OF", | ||
"description": "Used for inheritor relationship between father and sons" | ||
}, | ||
{ | ||
"label": "RULES", | ||
"properties": [ | ||
{ | ||
"name": "fromYear", | ||
"type": "INTEGER" | ||
} | ||
] | ||
} | ||
], | ||
"potential_schema": [ | ||
[ | ||
"Person", | ||
"PARENT_OF", | ||
"Person" | ||
], | ||
[ | ||
"Person", | ||
"HEIR_OF", | ||
"House" | ||
], | ||
[ | ||
"House", | ||
"RULES", | ||
"Planet" | ||
] | ||
], | ||
"text_splitter": { | ||
"class_": "text_splitters.fixed_size_splitter.FixedSizeSplitter", | ||
"params_": { | ||
"chunk_size": 100, | ||
"chunk_overlap": 10 | ||
} | ||
}, | ||
"pdf_loader": { | ||
"class_": "pdf_loader.PdfLoader", | ||
"run_params_": { | ||
"fs": "http" | ||
} | ||
}, | ||
"perform_entity_resolution": true | ||
} |
45 changes: 45 additions & 0 deletions
45
examples/build_graph/from_config_files/simple_kg_pipeline_from_config_file_with_url.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
"""In this example, the pipeline is defined in a JSON ('simple_kg_pipeline_config.json') | ||
or YAML ('simple_kg_pipeline_config.yaml') file. | ||
According to the configuration file, some parameters will be read from the env vars | ||
(Neo4j credentials and the OpenAI API key). | ||
""" | ||
|
||
import asyncio | ||
import logging | ||
|
||
## If env vars are in a .env file, uncomment: | ||
## (requires pip install python-dotenv) | ||
# from dotenv import load_dotenv | ||
# load_dotenv() | ||
# env vars manually set for testing: | ||
import os | ||
from pathlib import Path | ||
|
||
from neo4j_graphrag.experimental.pipeline.config.runner import PipelineRunner | ||
from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult | ||
|
||
logging.basicConfig() | ||
logging.getLogger("neo4j_graphrag").setLevel(logging.DEBUG) | ||
|
||
os.environ["NEO4J_URI"] = "bolt://localhost:7687" | ||
os.environ["NEO4J_USER"] = "neo4j" | ||
os.environ["NEO4J_PASSWORD"] = "password" | ||
# os.environ["OPENAI_API_KEY"] = "sk-..." | ||
|
||
|
||
root_dir = Path(__file__).parent | ||
file_path = root_dir / "simple_kg_pipeline_config_url.json" | ||
|
||
|
||
# File to process | ||
URL = "https://raw.githubusercontent.com/neo4j/neo4j-graphrag-python/c166afc4d5abc56a5686f3da46a97ed7c07da19d/examples/data/Harry%20Potter%20and%20the%20Chamber%20of%20Secrets%20Summary.pdf" | ||
|
||
|
||
async def main() -> PipelineResult: | ||
pipeline = PipelineRunner.from_config_file(file_path) | ||
return await pipeline.run({"file_path": URL}) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(asyncio.run(main())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters