Skip to content

Commit

Permalink
add missing dependencies, upgrade app to include complete data structure
Browse files Browse the repository at this point in the history
  • Loading branch information
lfunderburk committed Nov 26, 2024
1 parent 88e3486 commit f73defa
Show file tree
Hide file tree
Showing 8 changed files with 349 additions and 58 deletions.
5 changes: 2 additions & 3 deletions ch7/api-dockerization/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,12 @@ def run_bytewax(symbol):

# Use the pipeline to query
try:
response = query_pipeline.run({"text_embedder": {"text": request.question}, "prompt_builder": {"question": request.question}})
return {"answer": response["llm"]["replies"][0]}
response = query_pipeline.run(request.question, symbols)
return {"answer": response}
except Exception as e:
logger.error(f"Error querying the pipeline: {e}")
raise HTTPException(status_code=500, detail="Error querying the pipeline.")


@app.get("/")
def health_check():
"""Health check endpoint."""
Expand Down
10 changes: 5 additions & 5 deletions ch7/api-dockerization/indexing_dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ def run(self, sources: Dict[str, Any]) -> None:

# Create a Document with the cleaned content and metadata
content = source['content']
document = Document(content=content, meta=source)
document = Document(content=content, meta={"symbols": source.get("symbols", ""), **source})
documents.append(document)
logger.info(f"DOCUMENT {document}")

logger.info(f"Successfully processed {len(documents)} documents.")

Expand All @@ -114,7 +115,7 @@ def clean_text(self, text):


@component
class BenzingaEmbeder:
class BenzingaEmbedder:

def __init__(self, document_store, open_ai_key):
logger.info("Initializing BenzingaEmbeder pipeline.")
Expand Down Expand Up @@ -151,6 +152,7 @@ def __init__(self, document_store, open_ai_key):
def run(self, event: List[Union[str, Path, ByteStream]]):
logger.info(f"Running BenzingaEmbeder with event: {event}")
try:

documents = self.pipeline.run({"get_news": {"sources": [event]}})
self.pipeline.draw("benzinga_pipeline.png")
logger.info("Pipeline executed successfully, drawing pipeline graph.")
Expand All @@ -164,10 +166,8 @@ def filter_data(event, symbol):
"""Filter the data based on the symbol."""
return event and "symbols" in event and symbol in event["symbols"]


# Modified flow to include symbol filtering
def run_pipeline_with_symbol(symbol, document_store, open_ai_key):
embed_benzinga = BenzingaEmbeder(document_store, open_ai_key)
embed_benzinga = BenzingaEmbedder(document_store, open_ai_key)

def process_event(event):
"""Wrapper to handle the processing of each event."""
Expand Down
65 changes: 32 additions & 33 deletions ch7/api-dockerization/querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,53 +5,52 @@
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.components.readers import ExtractiveReader

from dotenv import load_dotenv
import os
import wandb
import time
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
load_dotenv(".env")
open_ai_key = os.environ.get("OPENAI_API_KEY")

from haystack import Pipeline
from haystack.components.embedders import OpenAITextEmbedder
from haystack.utils import Secret
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator

class RetrieveDocuments:

def __init__(self, doc_store, open_ai_key):

# Initialize a text embedder to create an embedding for the user query.
# Initialize components
text_embedder = OpenAITextEmbedder(api_key=Secret.from_token(open_ai_key))

# Initialize retriever
retriever = InMemoryEmbeddingRetriever(document_store=doc_store)
reader = ExtractiveReader()
reader.warm_up()
# Build the pipeline
self.query_pipeline = Pipeline()
self.query_pipeline.add_component("embedder",text_embedder)
self.query_pipeline.add_component("retriever", retriever)
self.query_pipeline.add_component("reader", reader)

# Define the template prompt
template = """
Given the following information, answer the question.
Context:
{% for document in documents %}
{{ document.content }}
{% endfor %}
Question: {{question}}
Answer:
"""
prompt_builder = PromptBuilder(template=template)
# Connect components
self.query_pipeline.connect("embedder.embedding", "retriever.query_embedding")
self.query_pipeline.connect("retriever.documents", "reader.documents")

# Initialize Generator (Replace 'your-api-key' with your OpenAI API Key)
generator = OpenAIGenerator(model="gpt-4o-mini")
generator.api_key = open_ai_key
def run(self, query, symbols):

logger.info(f"Running query pipeline with query: {query}")

# Build the Pipeline
self.query_pipeline = Pipeline()
self.query_pipeline.add_component("text_embedder", text_embedder)
self.query_pipeline.add_component("retriever", retriever)
self.query_pipeline.add_component("prompt_builder", prompt_builder)
self.query_pipeline.add_component("llm", generator)
self.query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
self.query_pipeline.connect("retriever", "prompt_builder.documents")
self.query_pipeline.connect("prompt_builder", "llm")

def run(self, query):
return self.query_pipeline.run(query)
# Pass query through the pipeline
response = self.query_pipeline.run(
data={"embedder": {"text": query},
"retriever": {"top_k": 3},
"reader": {"query": query, "top_k": 2}}
)
logger.info(f"Response: {response}")
return response #["llm"]["replies"][0]


# query_pipeline = RetrieveDocuments()
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ dependencies = [
"bytewax>=0.21",
"bytewax-redis",
"fastapi>=0.115",
"uvicorn"
"uvicorn",
"transformers[torch,sentencepiece]"
]

[build-system]
Expand Down
81 changes: 77 additions & 4 deletions requirements/lib-py3.10.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# This file was autogenerated by uv via the following command:
# uv pip compile --generate-hashes -p 3.10 --all-extras pyproject.toml -o requirements/lib-py3.10.txt
accelerate==1.1.1 \
--hash=sha256:0d39dfac557052bc735eb2703a0e87742879e1e40b88af8a2f9a93233d4cd7db \
--hash=sha256:61edd81762131b8d4bede008643fa1e1f3bf59bec710ebda9771443e24feae02
# via transformers
aiohappyeyeballs==2.4.3 \
--hash=sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586 \
--hash=sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572
Expand Down Expand Up @@ -649,6 +653,7 @@ huggingface-hub==0.26.2 \
--hash=sha256:98c2a5a8e786c7b2cb6fdeb2740893cba4d53e312572ed3d8afafda65b128c46 \
--hash=sha256:b100d853465d965733964d123939ba287da60a547087783ddff8a323f340332b
# via
# accelerate
# datasets
# sentence-transformers
# tokenizers
Expand Down Expand Up @@ -1221,6 +1226,7 @@ numpy==1.26.4 \
--hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \
--hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f
# via
# accelerate
# datasets
# haystack-ai
# langchain
Expand Down Expand Up @@ -1301,6 +1307,7 @@ packaging==24.2 \
--hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
--hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
# via
# accelerate
# datasets
# huggingface-hub
# ipykernel
Expand Down Expand Up @@ -1570,7 +1577,9 @@ protobuf==5.28.3 \
--hash=sha256:91fba8f445723fcf400fdbe9ca796b19d3b1242cd873907979b9ed71e4afe868 \
--hash=sha256:a3f6857551e53ce35e60b403b8a27b0295f7d6eb63d10484f12bc6879c715687 \
--hash=sha256:cee1757663fa32a1ee673434fcf3bf24dd54763c79690201208bafec62f19eed
# via wandb
# via
# transformers
# wandb
psutil==6.1.0 \
--hash=sha256:000d1d1ebd634b4efb383f4034437384e44a6d455260aaee2eca1e9c1b55f047 \
--hash=sha256:045f00a43c737f960d273a83973b2511430d61f283a44c96bf13a6e829ba8fdc \
Expand All @@ -1590,6 +1599,7 @@ psutil==6.1.0 \
--hash=sha256:d905186d647b16755a800e7263d43df08b790d709d575105d419f8b6ef65423a \
--hash=sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0
# via
# accelerate
# ipykernel
# wandb
ptyprocess==0.7.0 \
Expand Down Expand Up @@ -1845,6 +1855,7 @@ pyyaml==6.0.2 \
--hash=sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12 \
--hash=sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4
# via
# accelerate
# datasets
# haystack-ai
# huggingface-hub
Expand Down Expand Up @@ -2207,7 +2218,9 @@ safetensors==0.4.5 \
--hash=sha256:f68bf99ea970960a237f416ea394e266e0361895753df06e3e06e6ea7907d98b \
--hash=sha256:fd33da8e9407559f8779c82a0448e2133737f922d71f884da27184549416bfed \
--hash=sha256:fdadf66b5a22ceb645d5435a0be7a0292ce59648ca1d46b352f13cff3ea80410
# via transformers
# via
# accelerate
# transformers
scikit-learn==1.5.2 \
--hash=sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445 \
--hash=sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3 \
Expand Down Expand Up @@ -2277,6 +2290,61 @@ sentence-transformers==3.3.1 \
--hash=sha256:9635dbfb11c6b01d036b9cfcee29f7716ab64cf2407ad9f403a2e607da2ac48b \
--hash=sha256:abffcc79dab37b7d18d21a26d5914223dd42239cfe18cb5e111c66c54b658ae7
# via rag-with-haystack (pyproject.toml)
sentencepiece==0.2.0 \
--hash=sha256:0461324897735512a32d222e3d886e24ad6a499761952b6bda2a9ee6e4313ea5 \
--hash=sha256:0993dbc665f4113017892f1b87c3904a44d0640eda510abcacdfb07f74286d36 \
--hash=sha256:0a91aaa3c769b52440df56fafda683b3aa48e3f2169cf7ee5b8c8454a7f3ae9b \
--hash=sha256:0f67eae0dbe6f2d7d6ba50a354623d787c99965f068b81e145d53240198021b0 \
--hash=sha256:1380ce6540a368de2ef6d7e6ba14ba8f3258df650d39ba7d833b79ee68a52040 \
--hash=sha256:17982700c4f6dbb55fa3594f3d7e5dd1c8659a274af3738e33c987d2a27c9d5c \
--hash=sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227 \
--hash=sha256:1e0f9c4d0a6b0af59b613175f019916e28ade076e21242fd5be24340d8a2f64a \
--hash=sha256:20813a68d4c221b1849c62c30e1281ea81687894d894b8d4a0f4677d9311e0f5 \
--hash=sha256:22e37bac44dd6603388cb598c64ff7a76e41ca774646f21c23aadfbf5a2228ab \
--hash=sha256:27f90c55a65013cbb8f4d7aab0599bf925cde4adc67ae43a0d323677b5a1c6cb \
--hash=sha256:298f21cc1366eb60311aedba3169d30f885c363ddbf44214b0a587d2908141ad \
--hash=sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08 \
--hash=sha256:2fde4b08cfe237be4484c6c7c2e2c75fb862cfeab6bd5449ce4caeafd97b767a \
--hash=sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f \
--hash=sha256:38aed822fb76435fa1f12185f10465a94ab9e51d5e8a9159e9a540ce926f0ffd \
--hash=sha256:3f1ec95aa1e5dab11f37ac7eff190493fd87770f7a8b81ebc9dd768d1a3c8704 \
--hash=sha256:4547683f330289ec4f093027bfeb87f9ef023b2eb6f879fdc4a8187c7e0ffb90 \
--hash=sha256:4c378492056202d1c48a4979650981635fd97875a00eabb1f00c6a236b013b5e \
--hash=sha256:536b934e244829e3fe6c4f198652cd82da48adb9aa145c9f00889542726dee3d \
--hash=sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7 \
--hash=sha256:6cf333625234f247ab357b0bd9836638405ea9082e1543d5b8408f014979dcbf \
--hash=sha256:7140d9e5a74a0908493bb4a13f1f16a401297bd755ada4c707e842fbf6f0f5bf \
--hash=sha256:787e480ca4c1d08c9985a7eb1eae4345c107729c99e9b5a9a00f2575fc7d4b4b \
--hash=sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f \
--hash=sha256:7b06b70af54daa4b4904cbb90b4eb6d35c9f3252fdc86c9c32d5afd4d30118d8 \
--hash=sha256:7c867012c0e8bcd5bdad0f791609101cb5c66acb303ab3270218d6debc68a65e \
--hash=sha256:7cd6175f7eaec7142d2bf6f6597ce7db4c9ac89acf93fcdb17410c3a8b781eeb \
--hash=sha256:7fd6071249c74f779c5b27183295b9202f8dedb68034e716784364443879eaa6 \
--hash=sha256:859ba1acde782609a0910a26a60e16c191a82bf39b5621107552c0cd79fad00f \
--hash=sha256:89f65f69636b7e9c015b79dff9c9985a9bc7d19ded6f79ef9f1ec920fdd73ecf \
--hash=sha256:926ef920ae2e8182db31d3f5d081ada57804e3e1d3a8c4ef8b117f9d9fb5a945 \
--hash=sha256:98501e075f35dd1a1d5a20f65be26839fcb1938752ec61539af008a5aa6f510b \
--hash=sha256:a1151d6a6dd4b43e552394aed0edfe9292820272f0194bd56c7c1660a0c06c3d \
--hash=sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843 \
--hash=sha256:b293734059ef656dcd65be62ff771507bea8fed0a711b6733976e1ed3add4553 \
--hash=sha256:b99a308a2e5e569031ab164b74e6fab0b6f37dfb493c32f7816225f4d411a6dd \
--hash=sha256:bcbbef6cc277f8f18f36959e305f10b1c620442d75addc79c21d7073ae581b50 \
--hash=sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452 \
--hash=sha256:c581258cf346b327c62c4f1cebd32691826306f6a41d8c4bec43b010dee08e75 \
--hash=sha256:cdb701eec783d3ec86b7cd4c763adad8eaf6b46db37ee1c36e5e6c44b3fe1b5f \
--hash=sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c \
--hash=sha256:d1e5ca43013e8935f25457a4fca47e315780172c3e821b4b13a890668911c792 \
--hash=sha256:d490142b0521ef22bc1085f061d922a2a6666175bb6b42e588ff95c0db6819b2 \
--hash=sha256:d7b67e724bead13f18db6e1d10b6bbdc454af574d70efbb36f27d90387be1ca3 \
--hash=sha256:d8cf876516548b5a1d6ac4745d8b554f5c07891d55da557925e5c13ff0b4e6ad \
--hash=sha256:e3d1d2cc4882e8d6a1adf9d5927d7716f80617fc693385661caff21888972269 \
--hash=sha256:e58b47f933aca74c6a60a79dcb21d5b9e47416256c795c2d58d55cec27f9551d \
--hash=sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2 \
--hash=sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109 \
--hash=sha256:f4d158189eb2ecffea3a51edf6d25e110b3678ec47f1a40f2d541eafbd8f6250 \
--hash=sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251 \
--hash=sha256:ff88712338b01031910e8e61e7239aff3ce8869ee31a47df63cb38aadd591bea
# via transformers
sentry-sdk==2.19.0 \
--hash=sha256:7b0b3b709dee051337244a09a30dbf6e95afe0d34a1f8b430d45e0982a7c125b \
--hash=sha256:ee4a4d2ae8bfe3cac012dcf3e4607975904c137e1738116549fc3dbbb6ff0e36
Expand Down Expand Up @@ -2643,7 +2711,10 @@ torch==2.5.1 \
--hash=sha256:9b61edf3b4f6e3b0e0adda8b3960266b9009d02b37555971f4d1c8f7a05afed7 \
--hash=sha256:de5b7d6740c4b636ef4db92be922f0edc425b65ed78c5076c43c42d362a45457 \
--hash=sha256:ed231a4b3a5952177fafb661213d690a72caaad97d5824dd4fc17ab9e15cec03
# via sentence-transformers
# via
# accelerate
# sentence-transformers
# transformers
tornado==6.4.2 \
--hash=sha256:072ce12ada169c5b00b7d92a99ba089447ccc993ea2143c9ede887e0937aa803 \
--hash=sha256:1a017d239bd1bb0919f72af256a970624241f070496635784d9bf0db640d3fec \
Expand Down Expand Up @@ -2686,7 +2757,9 @@ traitlets==5.14.3 \
transformers==4.46.3 \
--hash=sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc \
--hash=sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef
# via sentence-transformers
# via
# rag-with-haystack (pyproject.toml)
# sentence-transformers
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
--hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
Expand Down
Loading

0 comments on commit f73defa

Please sign in to comment.