Skip to content

Commit

Permalink
chore: telemetry for rankers, readers, retrievers, writers (#6075)
Browse files Browse the repository at this point in the history
* add telemetry to pipelines 2.0

* only collect data if telemetry is on

* reno

* add downsampling

* typing

* manual tests

* pylint

* simplify code

* Update haystack/preview/telemetry/__init__.py

* look for _telemetry_data

* rather index by component type

* black

* mypy

* error handling

* comment

* review feedback & small improvements

* defaultdict

* stray changes

* try-catch

* method instead of attribute

* fixes

* remove print statements

* lint

* invert condition

* always send the first event of the day

* collect specs

* track 2nd and 3rd events too

* send first event and then max 1 event a minute

* rename constant

* black

* add test

* add telemetry for rankers readers and retrievers

* get only the type of docstore, not the whole object
  • Loading branch information
ZanSara authored Oct 16, 2023
1 parent 490de4e commit 6b09792
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 0 deletions.
6 changes: 6 additions & 0 deletions haystack/preview/components/rankers/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ def __init__(
self.model = None
self.tokenizer = None

def _get_telemetry_data(self) -> Dict[str, Any]:
"""
Data that is sent to Posthog for usage analytics.
"""
return {"model": str(self.model_name_or_path)}

def warm_up(self):
"""
Warm up the model and tokenizer used in scoring the documents.
Expand Down
6 changes: 6 additions & 0 deletions haystack/preview/components/readers/extractive.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ def __init__(
self.no_answer = no_answer
self.calibration_factor = calibration_factor

def _get_telemetry_data(self) -> Dict[str, Any]:
"""
Data that is sent to Posthog for usage analytics.
"""
return {"model": self.model_name_or_path}

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ def __init__(
self.top_k = top_k
self.scale_score = scale_score

def _get_telemetry_data(self) -> Dict[str, Any]:
"""
Data that is sent to Posthog for usage analytics.
"""
return {"document_store": type(self.document_store).__name__}

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ def __init__(
self.scale_score = scale_score
self.return_embedding = return_embedding

def _get_telemetry_data(self) -> Dict[str, Any]:
"""
Data that is sent to Posthog for usage analytics.
"""
return {"document_store": type(self.document_store).__name__}

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
Expand Down
6 changes: 6 additions & 0 deletions haystack/preview/components/writers/document_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ def __init__(self, document_store: DocumentStore, policy: DuplicatePolicy = Dupl
self.document_store = document_store
self.policy = policy

def _get_telemetry_data(self) -> Dict[str, Any]:
"""
Data that is sent to Posthog for usage analytics.
"""
return {"document_store": type(self.document_store).__name__}

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
Expand Down

0 comments on commit 6b09792

Please sign in to comment.