-
Notifications
You must be signed in to change notification settings - Fork 40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[migration2viatot2.0]: Migrated Supermetrics #671
Changes from 1 commit
af8444a
8c49474
e0674b2
184df69
2327d02
a9112aa
98a8611
43a8810
0fcd640
2f95c74
72fd8db
a35167d
76b5404
0a1e722
c1bd962
9539c03
7b773f1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from viadot.config import get_source_credentials | ||
from viadot.sources import Supermetrics | ||
|
||
|
||
def test_connection(): | ||
credentials = get_source_credentials("supermetrics") | ||
s = Supermetrics() | ||
google_ads_params = { | ||
"ds_id": "AW", | ||
"ds_accounts": ["1007802423"], | ||
"ds_user": credentials.get("user"), | ||
"date_range_type": "last_month", | ||
"fields": [ | ||
"Date", | ||
"Campaignname", | ||
"Clicks", | ||
], | ||
"max_rows": 1, | ||
} | ||
df = s.query(google_ads_params).to_df() | ||
assert df.count()[0] > 0 | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import pytest | ||
|
||
from viadot.sources import Supermetrics | ||
|
||
RESPONSE_PIVOTED = { | ||
"meta": { | ||
"query": { | ||
"fields": [ | ||
{ | ||
"id": "Date", | ||
"field_id": "Date", | ||
"field_name": "Date", | ||
"field_type": "dim", | ||
"field_split": "row", | ||
}, | ||
{ | ||
"id": "profile", | ||
"field_id": "profile", | ||
"field_name": "View", | ||
"field_type": "dim", | ||
"field_split": "row", | ||
}, | ||
{ | ||
"id": "segment", | ||
"field_id": "segment", | ||
"field_name": "Segment", | ||
"field_type": "dim", | ||
"field_split": "column", | ||
}, | ||
{ | ||
"id": "Sessions", | ||
"field_id": "Sessions", | ||
"field_name": "Sessions", | ||
"field_type": "met", | ||
"field_split": "row", | ||
}, | ||
] | ||
}, | ||
"result": {"total_columns": 6, "total_rows": 700}, | ||
}, | ||
"data": [ | ||
[ | ||
"Date", | ||
"View", | ||
"M-site_TOTAL: Bounces Landing", | ||
"M-site_TOTAL: Click to EDSP", | ||
"M-site_TOTAL: MQL Conversion Page Sessions", | ||
"M-site_TOTAL: Click to RWS", | ||
], | ||
["2020-01-01", "REDACTED", 123, 456, 78, 9], | ||
], | ||
} | ||
|
||
RESPONSE_PIVOTED_NO_DATA = { | ||
"meta": { | ||
"query": { | ||
"fields": [ | ||
{ | ||
"id": "Date", | ||
"field_id": "Date", | ||
"field_name": "Date", | ||
"field_type": "dim", | ||
"field_split": "row", | ||
}, | ||
{ | ||
"id": "profileID", | ||
"field_id": "profileID", | ||
"field_name": "View ID", | ||
"field_type": "dim", | ||
"field_split": "row", | ||
}, | ||
{ | ||
"id": "Hostname", | ||
"field_id": "Hostname", | ||
"field_name": "Hostname", | ||
"field_type": "dim", | ||
"field_split": "row", | ||
}, | ||
{ | ||
"id": "profile", | ||
"field_id": "profile", | ||
"field_name": "View", | ||
"field_type": "dim", | ||
"field_split": "row", | ||
}, | ||
{ | ||
"id": "segment", | ||
"field_id": "segment", | ||
"field_name": "Segment", | ||
"field_type": "dim", | ||
"field_split": "column", | ||
}, | ||
{ | ||
"id": "Sessions", | ||
"field_id": "Sessions", | ||
"field_name": "Sessions", | ||
"field_type": "met", | ||
"field_split": "row", | ||
}, | ||
] | ||
}, | ||
"result": {"total_columns": 0, "total_rows": 0}, | ||
}, | ||
"data": [], | ||
} | ||
|
||
|
||
def test___get_col_names_google_analytics_pivoted(): | ||
columns = Supermetrics._get_col_names_google_analytics(response=RESPONSE_PIVOTED) | ||
assert columns == [ | ||
"Date", | ||
"View", | ||
"M-site_TOTAL: Bounces Landing", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can add some more unit tests for functions like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
"M-site_TOTAL: Click to EDSP", | ||
"M-site_TOTAL: MQL Conversion Page Sessions", | ||
"M-site_TOTAL: Click to RWS", | ||
] | ||
|
||
|
||
def test___get_col_names_google_analytics_pivoted_no_data(): | ||
with pytest.raises(ValueError): | ||
Supermetrics._get_col_names_google_analytics(response=RESPONSE_PIVOTED_NO_DATA) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,7 +64,8 @@ def get_source_config(key, config=CONFIG): | |
if source_configs is not None: | ||
for source_config in source_configs: | ||
if key in source_config.keys(): | ||
return source_configs[source_configs.index(source_config)][key] | ||
# return source_configs[source_configs.index(source_config)][key] OBS!!!!!!!!!!!!! | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add this to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also add/fix a test (in |
||
return source_configs[source_configs.index(source_config)] | ||
|
||
|
||
def get_source_credentials(key, config=CONFIG): | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,164 @@ | ||||
import json | ||||
import urllib | ||||
from copy import deepcopy | ||||
from typing import Any, Dict, List | ||||
|
||||
import numpy as np | ||||
import pandas as pd | ||||
|
||||
from ..config import get_source_credentials | ||||
from ..exceptions import CredentialError | ||||
from ..utils import handle_api_response | ||||
from .base import Source | ||||
|
||||
|
||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add please credentials definitions using Pydantic as in the example below viadot/viadot/sources/databricks.py Line 22 in 9239fcb
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
class Supermetrics(Source): | ||||
""" | ||||
A class implementing the Supermetrics API. | ||||
|
||||
Documentation for this API is located at: https://supermetrics.com/docs/product-api-getting-started/ | ||||
Usage limits: https://supermetrics.com/docs/product-api-usage-limits/ | ||||
|
||||
Parameters | ||||
---------- | ||||
query_params : Dict[str, Any], optional | ||||
The parameters to pass to the GET query. | ||||
See https://supermetrics.com/docs/product-api-get-data/ for full specification, | ||||
by default None | ||||
""" | ||||
|
||||
API_ENDPOINT = "https://api.supermetrics.com/enterprise/v2/query/data/json" | ||||
|
||||
def __init__(self, *args, query_params: Dict[str, Any] = None, **kwargs): | ||||
DEFAULT_CREDENTIALS = get_source_credentials("supermetrics") | ||||
credentials = kwargs.pop("credentials", DEFAULT_CREDENTIALS) | ||||
|
||||
super().__init__(*args, credentials=credentials, **kwargs) | ||||
|
||||
if not self.credentials: | ||||
self.logger.debug( | ||||
"Credentials not specified. Falling back to `boto3` default credentials." | ||||
) | ||||
|
||||
self.query_params = query_params | ||||
|
||||
@classmethod | ||||
def get_params_from_api_query(cls, url: str) -> Dict[str, Any]: | ||||
"""Returns parmeters from API query in a dictionary""" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can standardize docstrings as in other sources viadot/viadot/sources/redshift_spectrum.py Line 109 in 9239fcb
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
url_unquoted = urllib.parse.unquote(url) | ||||
s = urllib.parse.parse_qs(url_unquoted) | ||||
endpoint = list(s.keys())[0] | ||||
params = s[endpoint][0] | ||||
params_d = json.loads(params) | ||||
return params_d | ||||
|
||||
@classmethod | ||||
def from_url(cls, url: str, credentials: Dict[str, Any] = None): | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing docstring There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
obj = Supermetrics( | ||||
credentials=credentials or get_source_credentials("supermetrics") | ||||
) | ||||
params = cls.get_params_from_api_query(url) | ||||
obj.query_params = params | ||||
return obj | ||||
|
||||
def to_json(self, timeout=(3.05, 60 * 30)) -> Dict[str, Any]: | ||||
"""Download query results to a dictionary. | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can standardize docstrings as in other sources viadot/viadot/sources/redshift_spectrum.py Line 109 in 9239fcb
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
Note that Supermetrics API will sometimes hang and not return any error message, | ||||
so we're adding a timeout to GET. | ||||
|
||||
See [requests docs](https://docs.python-requests.org/en/master/user/advanced/#timeouts) | ||||
for an explanation of why this timeout value will work on long-running queries but fail fast | ||||
on connection issues. | ||||
""" | ||||
|
||||
if not self.query_params: | ||||
raise ValueError("Please build the query first") | ||||
|
||||
params = {"json": json.dumps(self.query_params)} | ||||
headers = {"Authorization": f'Bearer {self.credentials.get("api_key")}'} | ||||
|
||||
response = handle_api_response( | ||||
url=self.API_ENDPOINT, params=params, headers=headers, timeout=timeout | ||||
) | ||||
return response.json() | ||||
|
||||
@classmethod | ||||
def _get_col_names_google_analytics( | ||||
cls, | ||||
response: dict, | ||||
) -> List[str]: | ||||
"""Returns list of Google Analytics columns names""" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can standardize docstrings as in other sources viadot/viadot/sources/redshift_spectrum.py Line 109 in 9239fcb
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
|
||||
# Supermetrics allows pivoting GA data, in which case it generates additional columns, | ||||
# which are not enlisted in response's query metadata but are instead added as the first row of data. | ||||
is_pivoted = any( | ||||
field["field_split"] == "column" | ||||
for field in response["meta"]["query"]["fields"] | ||||
) | ||||
|
||||
if is_pivoted: | ||||
if not response["data"]: | ||||
raise ValueError( | ||||
"Couldn't find column names as query returned no data." | ||||
) | ||||
columns = response["data"][0] | ||||
else: | ||||
# non-pivoted data; query fields match result fields | ||||
cols_meta = response["meta"]["query"]["fields"] | ||||
columns = [col_meta["field_name"] for col_meta in cols_meta] | ||||
return columns | ||||
|
||||
@classmethod | ||||
def _get_col_names_other(cls, response: dict) -> List[str]: | ||||
"""Returns list of columns names (to Google Analytics use _get_col_names_google_analytics ()""" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can standardize docstrings as in other sources viadot/viadot/sources/redshift_spectrum.py Line 109 in 9239fcb
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
cols_meta = response["meta"]["query"]["fields"] | ||||
columns = [col_meta["field_name"] for col_meta in cols_meta] | ||||
return columns | ||||
|
||||
def _get_col_names(self) -> List[str]: | ||||
"""Returns list of columns names""" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can standardize docstrings as in other sources viadot/viadot/sources/redshift_spectrum.py Line 109 in 9239fcb
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
|
||||
query_params_cp = deepcopy(self.query_params) | ||||
query_params_cp["offset_start"] = 0 | ||||
query_params_cp["offset_end"] = 0 | ||||
response: dict = Supermetrics(query_params=query_params_cp).to_json() | ||||
if self.query_params["ds_id"] == "GA": | ||||
return Supermetrics._get_col_names_google_analytics(response) | ||||
else: | ||||
return Supermetrics._get_col_names_other(response) | ||||
|
||||
def to_df(self, if_empty: str = "warn") -> pd.DataFrame: | ||||
"""Download data into a pandas DataFrame. | ||||
|
||||
Note that Supermetric can calculate some fields on the fly and alias them in the | ||||
returned result. For example, if the query requests the `position` field, | ||||
Supermetric may return an `Average position` caclulated field. | ||||
For this reason we take columns names from the actual results rather than from input fields. | ||||
|
||||
Args: | ||||
if_empty (str, optional): What to do if query returned no data. Defaults to "warn". | ||||
|
||||
Returns: | ||||
pd.DataFrame: the DataFrame containing query results | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing dot There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
""" | ||||
try: | ||||
columns = self._get_col_names() | ||||
except ValueError: | ||||
columns = None | ||||
|
||||
data = self.to_json()["data"] | ||||
|
||||
if data: | ||||
df = pd.DataFrame(data[1:], columns=columns).replace("", np.nan) | ||||
else: | ||||
df = pd.DataFrame(columns=columns) | ||||
|
||||
if df.empty: | ||||
self._handle_if_empty(if_empty) | ||||
|
||||
return df | ||||
|
||||
def query(self, params: Dict[str, Any]): | ||||
self.query_params = params | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing docstring There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||
self.query_params["api_key"] = self.credentials.get("api_key") | ||||
return self |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if you want to check the number of rows, you can just do
assert len(df) == 1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed the assert of the function output
[supermetrics-migration]: Done Pull Request Corrections