From 6b097928fff311ca4f0c9664e0a77218f73a2378 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 16 Oct 2023 17:02:24 +0100 Subject: [PATCH] chore: telemetry for rankers, readers, retrievers, writers (#6075) * add telemetry to pipelines 2.0 * only collect data if telemetry is on * reno * add downsampling * typing * manual tests * pylint * simplify code * Update haystack/preview/telemetry/__init__.py * look for _telemetry_data * rather index by component type * black * mypy * error handling * comment * review feedback & small improvements * defaultdict * stray changes * try-catch * method instead of attribute * fixes * remove print statements * lint * invert condition * always send the first event of the day * collect specs * track 2nd and 3rd events too * send first event and then max 1 event a minute * rename constant * black * add test * add telemetry for rankers readers and retrievers * get only the type of docstore, not the whole object --- haystack/preview/components/rankers/similarity.py | 6 ++++++ haystack/preview/components/readers/extractive.py | 6 ++++++ .../preview/components/retrievers/memory_bm25_retriever.py | 6 ++++++ .../components/retrievers/memory_embedding_retriever.py | 6 ++++++ haystack/preview/components/writers/document_writer.py | 6 ++++++ 5 files changed, 30 insertions(+) diff --git a/haystack/preview/components/rankers/similarity.py b/haystack/preview/components/rankers/similarity.py index 66f620294b..5d5125006a 100644 --- a/haystack/preview/components/rankers/similarity.py +++ b/haystack/preview/components/rankers/similarity.py @@ -56,6 +56,12 @@ def __init__( self.model = None self.tokenizer = None + def _get_telemetry_data(self) -> Dict[str, Any]: + """ + Data that is sent to Posthog for usage analytics. + """ + return {"model": str(self.model_name_or_path)} + def warm_up(self): """ Warm up the model and tokenizer used in scoring the documents. diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index 081646dd26..be18b56988 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -67,6 +67,12 @@ def __init__( self.no_answer = no_answer self.calibration_factor = calibration_factor + def _get_telemetry_data(self) -> Dict[str, Any]: + """ + Data that is sent to Posthog for usage analytics. + """ + return {"model": self.model_name_or_path} + def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/haystack/preview/components/retrievers/memory_bm25_retriever.py b/haystack/preview/components/retrievers/memory_bm25_retriever.py index 5ae1224e63..564f2fa087 100644 --- a/haystack/preview/components/retrievers/memory_bm25_retriever.py +++ b/haystack/preview/components/retrievers/memory_bm25_retriever.py @@ -41,6 +41,12 @@ def __init__( self.top_k = top_k self.scale_score = scale_score + def _get_telemetry_data(self) -> Dict[str, Any]: + """ + Data that is sent to Posthog for usage analytics. + """ + return {"document_store": type(self.document_store).__name__} + def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/haystack/preview/components/retrievers/memory_embedding_retriever.py b/haystack/preview/components/retrievers/memory_embedding_retriever.py index 1d5f1cea2b..17a709fdf2 100644 --- a/haystack/preview/components/retrievers/memory_embedding_retriever.py +++ b/haystack/preview/components/retrievers/memory_embedding_retriever.py @@ -44,6 +44,12 @@ def __init__( self.scale_score = scale_score self.return_embedding = return_embedding + def _get_telemetry_data(self) -> Dict[str, Any]: + """ + Data that is sent to Posthog for usage analytics. + """ + return {"document_store": type(self.document_store).__name__} + def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. diff --git a/haystack/preview/components/writers/document_writer.py b/haystack/preview/components/writers/document_writer.py index 5ce8c9d4c2..5b3d2f4acb 100644 --- a/haystack/preview/components/writers/document_writer.py +++ b/haystack/preview/components/writers/document_writer.py @@ -19,6 +19,12 @@ def __init__(self, document_store: DocumentStore, policy: DuplicatePolicy = Dupl self.document_store = document_store self.policy = policy + def _get_telemetry_data(self) -> Dict[str, Any]: + """ + Data that is sent to Posthog for usage analytics. + """ + return {"document_store": type(self.document_store).__name__} + def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary.