diff --git a/python/empyrical_evidences/README.md b/python/empyrical_evidences/README.md index 725b0daf..7bf89685 100644 --- a/python/empyrical_evidences/README.md +++ b/python/empyrical_evidences/README.md @@ -7,15 +7,15 @@ ## What It Does -This application searches Hacker News for comments relevant to your favorite academic papers. -Specifically, it uses the Together.ai inference APIs to identify key topics in papers, then searches Hacker News for related comments and ranks them. -It is serverlessly hosted on DBOS Cloud. +This application lets you interact with your favorite academic papers. +You can either ask a question to the paper, or search for comments on Hacker News related to the paper. +The application uses Together.ai inference APIs for both features and is serverlessly hosted on DBOS Cloud. ## How It Works - First, you upload an academic paper. The app uses the Together.ai API to query the paper's embeddings and store them in Postgres using pgvector. -- Then, you search Hacker News for comments relevant to the paper. - The app does this with a multi-agent workflow that first extracts the paper's key topics, then searches Hacker News for relevant comments, then ranks the comments. +- Then, you can either ask a question to the paper or search Hacker News relevant comments. +- Searching for comments on Hacker News is a multi-agents workflow that first extracts the paper's key topics, searches Hacker News for relevant comments, and ranks the comments. ## Why Use DBOS @@ -85,22 +85,38 @@ dbos start ## Usage +The application exposes a simple frontend. Locally it default to `localhost:8000/`. +This section documents the API usage. + ### Uploading A Paper Call the `/uploadPaper` endpoint with query parameters `paper_url` (must be base64 encoded) and `paper_tile`. For example: ```bash -curl "localhost:8000/uploadPaper?paper_url=aHR0cHM6Ly9wZW9wbGUuY3NhaWwubWl0LmVkdS90ZGFuZm9yZC82ODMwcGFwZXJzL3N0b25lYnJha2VyLWNzdG9yZS5wZGYK&paper_title=cstore" +curl "localhost:8000/uploadPaper?paper_url=aHR0cHM6Ly9wZW9wbGUuY3NhaWwubWl0LmVkdS90ZGFuZm9yZC82ODMwcGFwZXJzL3N0b25lYnJha2VyLWNzdG9yZS5wZGYK&paper_name=cstore" ``` -This will return a unique identifier for the paper. You will use that ID for your search. +### Ask a question to the paper +Call the `/askPaper` endpoint and set `paper_name` and `question` in the payload. For example: + +```bash +curl -X POST "localhost:8000/askPaper" -d '{"paper_name": "cstore", "question": "What is the main idea of the paper?"}' -H "Content-Type: application/json" +``` ### Search Hacker News comments and rank them Call the `startSearch` endpoint with query parameter `paper_id`. For example: ```bash -curl "localhost:8000/searchPaper?paper_id=c75178c7-7168-497b-a41f-381d8a557270 +curl "localhost:8000/startSearch?paper_name=cstore" ``` -The response will be in JSON. +The response, a list of comments for each topic, will be in JSON and the schema is: +```json +[{ + "topic", + "comment", + "url", + "story_title", +}] +``` diff --git a/python/empyrical_evidences/empyrical/main.py b/python/empyrical_evidences/empyrical/main.py index 5a6f19f8..3fac5689 100644 --- a/python/empyrical_evidences/empyrical/main.py +++ b/python/empyrical_evidences/empyrical/main.py @@ -1,36 +1,59 @@ import datetime import html +import os import re from typing import List, Dict import requests import uuid -import base64 from io import BytesIO -from dbos import DBOS, SetWorkflowUUID -from fastapi import FastAPI from PyPDF2 import PdfReader from sqlalchemy.engine import create_engine +from sqlalchemy import URL +from pydantic import BaseModel + from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_together import TogetherEmbeddings, ChatTogether from langchain_postgres.vectorstores import PGVector from langchain_community.vectorstores import DistanceStrategy from langchain_core.runnables import RunnablePassthrough from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser + +from utils import decode_paper_url + +# Import FastAPI to serve requests +from fastapi import FastAPI, HTTPException +from fastapi.responses import HTMLResponse + +# Import Together Python SDK and the Langchain Together.ai integration +from langchain_together import TogetherEmbeddings, ChatTogether from together import Together +# Import DBOS lightweight annotations +from dbos import DBOS, SetWorkflowID, load_config + +# Import the sqlalchemy schema representing papers metadata from schema import papers_metadata -# We will use FastAPI to expose the application to the web +# First, we'll create a FastAPI app and a DBOS instance +# The app will expose endpoints to upload and search for papers +# The DBOS instance will manage durable execution app = FastAPI() -# Configure a DBOS instance dbos = DBOS(fastapi=app) +dbos_config = load_config() -# Configure a Postgres vector store to use BERT embeddings, served by Together.ai +# Now, let's setup a vector store to store embeddings +# We will use BERT, served by Together.ai, and postgres/pgvector as a vector store embeddings = TogetherEmbeddings( model="togethercomputer/m2-bert-80M-8k-retrieval", ) -db_url = dbos.app_db.engine.url +db_url = URL.create( + "postgresql", + username=dbos_config["database"]["username"], + password=dbos_config["database"]["password"], + host=dbos_config["database"]["hostname"], + port=dbos_config["database"]["port"], + database=dbos_config["database"]["app_db_name"], +) db_engine = create_engine(db_url) vector_store = PGVector( embeddings=embeddings, @@ -41,45 +64,34 @@ create_extension=True, ) -# ChatTogether will let us query Together.ai for exploring a paper's topics. -model = ChatTogether( - model="mistralai/Mixtral-8x7B-Instruct-v0.1", -) - ####################### #### UPLOAD PAPERS #### ####################### -# Expose an endpoint to upload a paper. @app.get() is a FastAPI decorator that maps a URL to a function. -# The handler will invoke a DBOS workflow (upload_paper_workflow) block until the workflow completes, then return its result. +# Let's program an endpoint to upload papers +# @app.get() is a FastAPI decorator that maps a URL to `upload_paper()` +# `upload_paper()` will synchronously invoke a DBOS workflow (upload_paper_workflow), then return its result @app.get("/uploadPaper") -def upload_paper(paper_url: str, paper_title: str): - paper_id = uuid.uuid4() - with SetWorkflowUUID(str(uuid.uuid4())): - handle = dbos.start_workflow(upload_paper_workflow, paper_url, paper_title, paper_id) +def upload_paper(paper_url: str, paper_name: str): + with SetWorkflowID(str(uuid.uuid4())): + handle = dbos.start_workflow(upload_paper_workflow, paper_url, paper_name) return handle.get_result() -# Register a DBOS workflow. The workflow does three things: +# Let's register a DBOS workflow. It does three things: # 1. Record the paper metadata in the database (exactly once, using a DBOS 'transaction') -# 2. Download the paper from the URL (at least once, using a DBOS 'step') -# 3. Store the paper embeddings in the vector store (at least once, using a DBOS 'step'. Note this could be an exactly-once transaction if we could manage the PGVector connection.) -# DBOS workflows are resilient to failure: if an error occurs, the workflow will resume exactly where it left off. +# 2. Download the paper (at least once, using a DBOS 'step') +# 3. Store the paper embeddings in the vector store (at least once, using a DBOS 'step' +# DBOS workflows are resilient to failure: if an error occurs, the workflow will resume exactly where it left off @dbos.workflow() -def upload_paper_workflow(paper_url: str, paper_title: str, paper_id: uuid.UUID): +def upload_paper_workflow(paper_url: str, paper_name: str) -> str: compensation_actions = [] - # Decode URL from base64. We expect base64 because we encode the paper URL in the endpoint's URL. - # Ensure the string is properly padded and replace + and / with - and _ - # Note: this fails for some PDFs. Turns out parsing PDFs has a bunch of corner cases. - missing_padding = len(paper_url) % 4 - if missing_padding: - paper_url += '=' * (4 - missing_padding) - paper_url = paper_url.replace('+', '-') - paper_url = paper_url.replace('/', '_') - decoded_url = base64.urlsafe_b64decode(paper_url).decode('utf-8') - - # Create a record in the database for the paper. Note: if this fail, record a compensation action. - record_paper_metadata(paper_title, decoded_url, paper_id) + # We expect URLs in base64 + decoded_url = decode_paper_url(paper_url) + + # Create a record in the database for the paper. If this fails, record a compensation action + paper_id = uuid.uuid4() + record_paper_metadata(paper_name, decoded_url, paper_id) compensation_actions.append(lambda: delete_paper_metadata(paper_id)) # Download the paper and breaks it down into pages. @@ -91,31 +103,34 @@ def upload_paper_workflow(paper_url: str, paper_title: str, paper_id: uuid.UUID) DBOS.logger.error(f"Failed to download or parse the paper: {e}") for action in compensation_actions: action() - return + raise e # Retrieve the embeddings using Together.ai and store them in our vector store try: - store_paper_embeddings(pages, paper_id) + store_paper_embeddings(pages, paper_name, paper_url, paper_id) except Exception as e: DBOS.logger.error(f"Failed to store the embeddings: {e}") for action in compensation_actions: action() + raise e -# Record the paper metadata in the database using a DBOS Transaction. Note the usage of `DBOS.sql_session` to execute SQL queries. -# Using this session, DBOS will automatically bundle the database queries in a transaction. -# It will also insert metadata for this step in the same transaction, this guaranteeing extactly-once execution. + return {"name": paper_name, "url": decoded_url, "id": paper_id} + +# Record the paper metadata in the database using a DBOS Transaction. Note the usage of `DBOS.sql_session` to execute SQL queries +# Using this session, DBOS will automatically bundle the database queries in a transaction +# It will also insert metadata for this step in the same transaction to provide exactly-once execution @dbos.transaction() -def record_paper_metadata(paper_title: str, paper_url: str, paper_id: uuid.UUID): +def record_paper_metadata(paper_name: str, paper_url: str, paper_id: uuid.UUID): DBOS.sql_session.execute( papers_metadata.insert().values( uuid=paper_id, - name=paper_title, + name=paper_name, url=paper_url, ) ) - DBOS.logger.info(f"Recorded metadata for {paper_title}") + DBOS.logger.info(f"Recorded metadata for {paper_name}") -# Delete the paper metadata in the database using a DBOS Transaction. +# Delete the paper metadata in the database using a DBOS Transaction @dbos.transaction() def delete_paper_metadata(paper_id: uuid.UUID): DBOS.sql_session.execute( @@ -125,8 +140,8 @@ def delete_paper_metadata(paper_id: uuid.UUID): ) DBOS.logger.info(f"Deleted metadata for {paper_id}") -# Download the paper from the URL using a DBOS Step. This function will execute at least once. -# You can configure the retry behavior of the step. See https://docs.dbos.dev/. +# Download the paper using a DBOS Step. This function will execute at least once +# You can configure the retry behavior of the step. See https://docs.dbos.dev/ @dbos.step() def download_paper(paper_url: str) -> bytes: DBOS.logger.info(f"Downloading paper from {paper_url}") @@ -135,11 +150,10 @@ def download_paper(paper_url: str) -> bytes: raise Exception(f"Failed to download paper: {response.status_code}") return response.content -# Store the paper embeddings in the vector store using a DBOS Step. This function will execute at least once. -# This could be a DBOS transaction, but PGVector managers its own connections +# Store the paper embeddings in the vector store using a DBOS step. This function will execute at least once @dbos.step() -def store_paper_embeddings(pages: List[str], paper_id: uuid.UUID): - # Create large enough chunks to avoid beeing rate limited by together.ai +def store_paper_embeddings(pages: List[str], paper_name: str, paper_url: str, paper_id: uuid.UUID): + # Create large enough chunks to avoid rate limits from together.ai text_splitter = RecursiveCharacterTextSplitter( chunk_size=3000, chunk_overlap=200, @@ -148,7 +162,11 @@ def store_paper_embeddings(pages: List[str], paper_id: uuid.UUID): # Set the paper_id in the Document metadata DBOS.logger.info(f"Chunking {len(pages)} pages") - metadatas = [{"id": str(paper_id)} for _ in pages] + metadatas = [{ + "id": str(paper_id), + "url": paper_url, + "name": paper_name, + } for _ in pages] documents = text_splitter.create_documents(pages, metadatas=metadatas) split_pages = text_splitter.split_documents(documents) @@ -157,52 +175,128 @@ def store_paper_embeddings(pages: List[str], paper_id: uuid.UUID): vector_store.add_documents(split_pages) DBOS.logger.info("Fed vector store") -###################### -#### QUERY PAPERS #### -###################### +################################### +#### ASK A QUESTION TO A PAPER #### +################################### + +# ChatTogether will let us query Together.ai for interacting with a paper +model = ChatTogether( + model="mistralai/Mixtral-8x7B-Instruct-v0.1", +) -# Prompt template for searching a paper. -search_template = """ [INST] +# We will now expose an endpoint to ask a question about a paper +# First let's define a prompt template for questionning a paper +ask_paper_template = """ [INST] You are the author of this research paper: {context} + Someone asks you the following question about the paper: Question: {question} + Support your answer with excerpts from the paper. + Excerpts should not include figures captions. + In the answer, make sure to include the paper name and the authors. + + [/INST] +""" +ask_paper_prompt = ChatPromptTemplate.from_template(ask_paper_template) + +# Then let's declare the endpoint. +# This is a one-shot operation that does require durability, so we don't mark it as a DBOS operation +class PaperQuestion(BaseModel): + question: str + paper_name: str + +@app.post("/askPaper") +@dbos.workflow() +def ask_paper_endpoint(q: PaperQuestion): + # First retrieve the paper's metadata + paper = get_paper(q.paper_name) + if paper is None: + raise HTTPException(status_code=404, detail=f"Paper {q.paper_name} not found") + DBOS.logger.debug(f"Retrieved paper metadata: {paper}") + + # Then ask the question to the paper + DBOS.logger.info(f"Asked question: '{q.question}' to paper {q.paper_name}") + try: + # Use our vector store to retrieve the paper embeddings + # We narrow the search to content associated with the paper's UUID + retriever = vector_store.as_retriever( + search_kwargs={'filter': {'id': str(paper.uuid), 'name': paper.name}} + ) + # The chain simply invokes the model with the question and parses the output + chain = ( + {"context": retriever, "question": RunnablePassthrough()} + | ask_paper_prompt + | model + | StrOutputParser() + ) + answer = chain.invoke(q.question) + except Exception as e: + msg = f"Failed to retrieve answer from the paper: {e}" + DBOS.logger.error(msg) + raise HTTPException(status_code=500, detail=msg) + + return answer + +######################################## +#### SEARCH FOR RELATED HD COMMENTS #### +######################################## + +# Now, we will expose an endpoint to search for HN comments related to a paper +# This is a multi-agent workflow that requires durability, so we will use DBOS + +# Prompt template for identifying topics in a paper +topics_search_template = """ [INST] + You are the author of this research paper: {context} + + List the {question} most important topics addressed by the paper. + Format your answer as a list of at most 2 words strings. Do not add any additional information. For example: - 1. Topic 1 - 2. Topic 2 - 3. Topic 3 + Topic 1 + Topic 2 + Topic 3 + + Do not number items in the list. [/INST] """ -search_prompt = ChatPromptTemplate.from_template(search_template) +topics_search_prompt = ChatPromptTemplate.from_template(topics_search_template) -# Expose an endpoint to search for comments on a paper. The handler will invoke a DBOS workflow (search_paper_workflow) block until the workflow completes, then return its result. +# The handler invokes a DBOS workflow, block until the workflow completes, then return its result @app.get("/startSearch") -def search_paper(paper_id: str): - with SetWorkflowUUID(str(uuid.uuid4())): - handle = dbos.start_workflow(search_paper_workflow, paper_id) +def search_paper(paper_name: str): + DBOS.logger.info(f"Searching for comments on paper {paper_name}") + with SetWorkflowID(str(uuid.uuid4())): + handle = dbos.start_workflow(search_paper_workflow, paper_name) comments = handle.get_result() return comments -# Register a DBOS workflow to search for comments on a paper. The workflow does three things: -# 1. Query the paper for a list of main topics in the paper -# 2. Search for comments on these topics on Hackernews -# 3. Rank each topic's comment and select the most relevant one +# The DBOS workflow. It does three things: +# 1. Query the paper for a list of main topics in the paper +# 2. Search for comments on these topics on Hackernews +# 3. Rank each topic's comment and select the most relevant one +# Durability is important for this workflow. If it fails, we want to resume exactly where we left off and not consume our together.ai credits @dbos.workflow() -def search_paper_workflow(paper_id: str): +def search_paper_workflow(paper_name: str): + # First retrieve the paper's metadata + paper = get_paper(paper_name) + if paper is None: + raise HTTPException(status_code=404, detail=f"Paper {paper_name} not found") + DBOS.logger.debug(f"Retrieved paper metadata: {paper}") + # Query the paper for a list of topics - retriever = vector_store.as_retriever( - filter={"id": paper_id} - ) - chain = ( - {"context": retriever, "question": RunnablePassthrough()} - | search_prompt - | model - | StrOutputParser() - ) - question = "List the 5 most meaningful topics that represent this paper's contribution." + topics_number = "5" try: - topics = chain.invoke(question).split("\n") + retriever = vector_store.as_retriever( + search_kwargs={'filter': {'id': str(paper.uuid), 'name': paper.name}} + ) + chain = ( + {"context": retriever, "question": RunnablePassthrough()} + | topics_search_prompt + | model + | StrOutputParser() + ) + topics = chain.invoke(topics_number).split("\n") except Exception as e: DBOS.logger.error(f"Failed to retrieve topics from the paper: {e}") return @@ -229,7 +323,7 @@ def search_topics(topics: List[str]) -> Dict[str, List[Dict]]: results[topic] = search_hackernews(topic, window_size_hours=730) return results -# Search for comments on a list of topics using a DBOS Step +# Search for comments on a list of topics using a DBOS step @dbos.step() def search_hackernews(topic: str, window_size_hours: int) -> List[Dict[str, str]]: threshold = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=window_size_hours) @@ -250,7 +344,7 @@ def search_hackernews(topic: str, window_size_hours: int) -> List[Dict[str, str] comment = re.sub("<[^<]+?>", "", comment) url = f"https://news.ycombinator.com/item?id={hit['objectID']}" hits.append({ - "query_topic": topic, + "topic": topic, "comment": comment, "url": url, "story_title": hit["story_title"], @@ -260,7 +354,7 @@ def search_hackernews(topic: str, window_size_hours: int) -> List[Dict[str, str] # Rank the comments using Together.ai and Salesforce Llama-Rank @dbos.step() def rank_comments(comments: Dict[str, List[Dict]]) -> Dict[str, Dict]: - results = {} + results = [] client = Together() for topic, result in comments.items(): if len(result) > 0: @@ -274,5 +368,36 @@ def rank_comments(comments: Dict[str, List[Dict]]) -> Dict[str, Dict]: DBOS.logger.info(f"Most relevant comment for topic {topic}:") DBOS.logger.info(most_relevant_comment['comment']) DBOS.logger.info(most_relevant_comment['url']) - results[topic] = most_relevant_comment + results.append(most_relevant_comment) return results + +@dbos.transaction() +def get_paper(name: str): + return DBOS.sql_session.execute( + papers_metadata.select().where(papers_metadata.c.name == name) + ).mappings().first() + +################## +#### FRONTEND #### +################## + +# In production, we recommend using DBOS primarily for the backend, with your frontend deployed elsewhere + +@app.get("/") +def frontend(): + with open(os.path.join("html", "app.html")) as file: + html = file.read() + return HTMLResponse(html) + +# Let's program an endpoint to get all papers +@app.get("/papers") +@dbos.transaction() +def get_papers(): + rows = DBOS.sql_session.execute(papers_metadata.select()) + return [dict(row) for row in rows.mappings()] + +@dbos.transaction() +def get_paper(name: str): + return DBOS.sql_session.execute( + papers_metadata.select().where(papers_metadata.c.name == name) + ).mappings().first() diff --git a/python/empyrical_evidences/html/app.html b/python/empyrical_evidences/html/app.html new file mode 100644 index 00000000..fc24bda8 --- /dev/null +++ b/python/empyrical_evidences/html/app.html @@ -0,0 +1,249 @@ + + + + + + Empyrical Evidences + + + + + + + + +
+ +
+

Upload paper

+ + + + +
+

Response:

+

+
+
+ + +
+

Papers ingested

+
    + +
+
+
+ + +
+

Ask a question to a paper

+ + + + +
+

Response:

+

+
+
+ + +
+

Search for Hacker News comments

+ + + +
+

Hacker News comments:

+
    + +
+
+
+ + diff --git a/python/empyrical_evidences/requirements.txt b/python/empyrical_evidences/requirements.txt index f0341a2e..3b499f71 100644 --- a/python/empyrical_evidences/requirements.txt +++ b/python/empyrical_evidences/requirements.txt @@ -1,116 +1,121 @@ -aiohappyeyeballs==2.4.0 -aiohttp==3.10.5 +aiohappyeyeballs==2.4.3 +aiohttp==3.10.9 aiosignal==1.3.1 -alembic==1.13.2 +alembic==1.13.3 annotated-types==0.7.0 -anyio==4.4.0 +anyio==4.6.0 async-timeout==4.0.3 asyncpg==0.29.0 attrs==24.2.0 -certifi==2024.7.4 -charset-normalizer==3.3.2 +certifi==2024.8.30 +charset-normalizer==3.4.0 click==8.1.7 dataclasses-json==0.6.7 -dbos==0.4.0a11 +dbos==0.8.0 Deprecated==1.2.14 dirtyjson==1.0.8 distro==1.9.0 -dnspython==2.6.1 +dnspython==2.7.0 email_validator==2.2.0 eval_type_backport==0.2.0 exceptiongroup==1.2.2 -fastapi==0.112.1 +fastapi==0.115.0 fastapi-cli==0.0.5 -filelock==3.15.4 +filelock==3.16.1 frozenlist==1.4.1 -fsspec==2024.6.1 -googleapis-common-protos==1.64.0 -greenlet==3.0.3 +fsspec==2024.9.0 +googleapis-common-protos==1.65.0 +greenlet==3.1.1 h11==0.14.0 -httpcore==1.0.5 +httpcore==1.0.6 httptools==0.6.1 -httpx==0.27.0 -idna==3.8 -importlib_metadata==8.0.0 +httpx==0.27.2 +idna==3.10 +importlib_metadata==8.4.0 Jinja2==3.1.4 -jiter==0.5.0 +jiter==0.6.1 joblib==1.4.2 jsonpatch==1.33 -jsonpickle==3.2.2 +jsonpickle==3.3.0 jsonpointer==3.0.0 jsonschema==4.23.0 -jsonschema-specifications==2023.12.1 -langchain==0.2.14 -langchain-community==0.2.12 -langchain-core==0.2.35 -langchain-openai==0.1.22 -langchain-postgres==0.0.9 -langchain-text-splitters==0.2.2 -langchain-together==0.1.5 -langsmith==0.1.104 -llama-index-core==0.10.68.post1 -llama-index-vector-stores-postgres==0.1.14 +jsonschema-specifications==2024.10.1 +langchain==0.3.3 +langchain-community==0.3.2 +langchain-core==0.3.10 +langchain-openai==0.2.2 +langchain-postgres==0.0.12 +langchain-text-splitters==0.3.0 +langchain-together==0.2.0 +langsmith==0.1.132 +llama-index-core==0.11.17 +llama-index-vector-stores-postgres==0.2.6 Mako==1.3.5 markdown-it-py==3.0.0 -MarkupSafe==2.1.5 +MarkupSafe==3.0.1 marshmallow==3.22.0 mdurl==0.1.2 -multidict==6.0.5 +multidict==6.1.0 mypy-extensions==1.0.0 nest-asyncio==1.6.0 networkx==3.2.1 nltk==3.9.1 numpy==1.26.4 -openai==1.42.0 -opentelemetry-api==1.26.0 -opentelemetry-exporter-otlp-proto-common==1.26.0 -opentelemetry-exporter-otlp-proto-http==1.26.0 -opentelemetry-proto==1.26.0 -opentelemetry-sdk==1.26.0 -opentelemetry-semantic-conventions==0.47b0 +openai==1.51.2 +opentelemetry-api==1.27.0 +opentelemetry-exporter-otlp-proto-common==1.27.0 +opentelemetry-exporter-otlp-proto-http==1.27.0 +opentelemetry-proto==1.27.0 +opentelemetry-sdk==1.27.0 +opentelemetry-semantic-conventions==0.48b0 orjson==3.10.7 packaging==24.1 -pandas==2.2.2 +pandas==2.2.3 pgvector==0.2.5 pillow==10.4.0 -protobuf==4.25.4 -psycopg==3.2.1 -psycopg-pool==3.2.2 +propcache==0.2.0 +protobuf==4.25.5 +psutil==6.0.0 +psycopg==3.2.3 +psycopg-pool==3.2.3 psycopg2-binary==2.9.9 pyarrow==17.0.0 -pydantic==2.8.2 -pydantic_core==2.20.1 +pydantic==2.9.2 +pydantic-settings==2.5.2 +pydantic_core==2.23.4 Pygments==2.18.0 PyPDF2==3.0.1 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 -python-multipart==0.0.9 -pytz==2024.1 +python-multipart==0.0.12 +pytz==2024.2 PyYAML==6.0.2 referencing==0.35.1 -regex==2024.7.24 +regex==2024.9.11 requests==2.32.3 -rich==13.8.0 +requests-toolbelt==1.0.0 +rich==13.9.2 rpds-py==0.20.0 shellingham==1.5.4 six==1.16.0 sniffio==1.3.1 -SQLAlchemy==2.0.32 -starlette==0.38.2 +SQLAlchemy==2.0.35 +starlette==0.38.6 tabulate==0.9.0 tenacity==8.5.0 -tiktoken==0.7.0 -together==1.2.9 +tiktoken==0.8.0 +together==1.3.1 +tomlkit==0.13.2 tqdm==4.66.5 typer==0.12.5 typing-inspect==0.9.0 typing_extensions==4.12.2 -tzdata==2024.1 -urllib3==2.2.2 -uvicorn==0.30.6 +tzdata==2024.2 +urllib3==2.2.3 +uvicorn==0.31.1 uvloop==0.20.0 -watchfiles==0.23.0 -websockets==13.0 +watchfiles==0.24.0 +websockets==13.1 wrapt==1.16.0 -yarl==1.9.4 -zipp==3.20.1 +yarl==1.14.0 +zipp==3.20.2