Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[migration2viatot2.0]: Migrated Supermetrics #671

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions tests/integration/test_supermetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from viadot.config import get_source_credentials
from viadot.sources import Supermetrics


def test_connection():
credentials = get_source_credentials("supermetrics")
s = Supermetrics()
google_ads_params = {
"ds_id": "AW",
"ds_accounts": ["1007802423"],
"ds_user": credentials.get("user"),
"date_range_type": "last_month",
"fields": [
"Date",
"Campaignname",
"Clicks",
],
"max_rows": 1,
}
df = s.query(google_ads_params).to_df()
assert df.count()[0] > 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you want to check the number of rows, you can just do assert len(df) == 1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the assert of the function output
[supermetrics-migration]: Done Pull Request Corrections

122 changes: 122 additions & 0 deletions tests/unit/test_supermetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pytest

from viadot.sources import Supermetrics

RESPONSE_PIVOTED = {
"meta": {
"query": {
"fields": [
{
"id": "Date",
"field_id": "Date",
"field_name": "Date",
"field_type": "dim",
"field_split": "row",
},
{
"id": "profile",
"field_id": "profile",
"field_name": "View",
"field_type": "dim",
"field_split": "row",
},
{
"id": "segment",
"field_id": "segment",
"field_name": "Segment",
"field_type": "dim",
"field_split": "column",
},
{
"id": "Sessions",
"field_id": "Sessions",
"field_name": "Sessions",
"field_type": "met",
"field_split": "row",
},
]
},
"result": {"total_columns": 6, "total_rows": 700},
},
"data": [
[
"Date",
"View",
"M-site_TOTAL: Bounces Landing",
"M-site_TOTAL: Click to EDSP",
"M-site_TOTAL: MQL Conversion Page Sessions",
"M-site_TOTAL: Click to RWS",
],
["2020-01-01", "REDACTED", 123, 456, 78, 9],
],
}

RESPONSE_PIVOTED_NO_DATA = {
"meta": {
"query": {
"fields": [
{
"id": "Date",
"field_id": "Date",
"field_name": "Date",
"field_type": "dim",
"field_split": "row",
},
{
"id": "profileID",
"field_id": "profileID",
"field_name": "View ID",
"field_type": "dim",
"field_split": "row",
},
{
"id": "Hostname",
"field_id": "Hostname",
"field_name": "Hostname",
"field_type": "dim",
"field_split": "row",
},
{
"id": "profile",
"field_id": "profile",
"field_name": "View",
"field_type": "dim",
"field_split": "row",
},
{
"id": "segment",
"field_id": "segment",
"field_name": "Segment",
"field_type": "dim",
"field_split": "column",
},
{
"id": "Sessions",
"field_id": "Sessions",
"field_name": "Sessions",
"field_type": "met",
"field_split": "row",
},
]
},
"result": {"total_columns": 0, "total_rows": 0},
},
"data": [],
}


def test___get_col_names_google_analytics_pivoted():
columns = Supermetrics._get_col_names_google_analytics(response=RESPONSE_PIVOTED)
assert columns == [
"Date",
"View",
"M-site_TOTAL: Bounces Landing",

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can add some more unit tests for functions like to_json(), _get_col_names_other(), to_df()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"M-site_TOTAL: Click to EDSP",
"M-site_TOTAL: MQL Conversion Page Sessions",
"M-site_TOTAL: Click to RWS",
]


def test___get_col_names_google_analytics_pivoted_no_data():
with pytest.raises(ValueError):
Supermetrics._get_col_names_google_analytics(response=RESPONSE_PIVOTED_NO_DATA)
3 changes: 2 additions & 1 deletion viadot/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def get_source_config(key, config=CONFIG):
if source_configs is not None:
for source_config in source_configs:
if key in source_config.keys():
return source_configs[source_configs.index(source_config)][key]
# return source_configs[source_configs.index(source_config)][key] OBS!!!!!!!!!!!!!

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add this to CHANGELOG.md with issue number

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also add/fix a test (in test_config.py)

return source_configs[source_configs.index(source_config)]


def get_source_credentials(key, config=CONFIG):
Expand Down
164 changes: 164 additions & 0 deletions viadot/sources/supermetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import json
import urllib
from copy import deepcopy
from typing import Any, Dict, List

import numpy as np
import pandas as pd

from ..config import get_source_credentials
from ..exceptions import CredentialError
from ..utils import handle_api_response
from .base import Source


Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add please credentials definitions using Pydantic as in the example below

class DatabricksCredentials(BaseModel):

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

class Supermetrics(Source):
"""
A class implementing the Supermetrics API.

Documentation for this API is located at: https://supermetrics.com/docs/product-api-getting-started/
Usage limits: https://supermetrics.com/docs/product-api-usage-limits/

Parameters
----------
query_params : Dict[str, Any], optional
The parameters to pass to the GET query.
See https://supermetrics.com/docs/product-api-get-data/ for full specification,
by default None
"""

API_ENDPOINT = "https://api.supermetrics.com/enterprise/v2/query/data/json"

def __init__(self, *args, query_params: Dict[str, Any] = None, **kwargs):
DEFAULT_CREDENTIALS = get_source_credentials("supermetrics")
credentials = kwargs.pop("credentials", DEFAULT_CREDENTIALS)

super().__init__(*args, credentials=credentials, **kwargs)

if not self.credentials:
self.logger.debug(
"Credentials not specified. Falling back to `boto3` default credentials."
)

self.query_params = query_params

@classmethod
def get_params_from_api_query(cls, url: str) -> Dict[str, Any]:
"""Returns parmeters from API query in a dictionary"""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can standardize docstrings as in other sources

Deletes table from AWS Glue database and related file from Amazon S3, if specified.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

url_unquoted = urllib.parse.unquote(url)
s = urllib.parse.parse_qs(url_unquoted)
endpoint = list(s.keys())[0]
params = s[endpoint][0]
params_d = json.loads(params)
return params_d

@classmethod
def from_url(cls, url: str, credentials: Dict[str, Any] = None):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing docstring

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

obj = Supermetrics(
credentials=credentials or get_source_credentials("supermetrics")
)
params = cls.get_params_from_api_query(url)
obj.query_params = params
return obj

def to_json(self, timeout=(3.05, 60 * 30)) -> Dict[str, Any]:
"""Download query results to a dictionary.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can standardize docstrings as in other sources

Deletes table from AWS Glue database and related file from Amazon S3, if specified.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that Supermetrics API will sometimes hang and not return any error message,
so we're adding a timeout to GET.

See [requests docs](https://docs.python-requests.org/en/master/user/advanced/#timeouts)
for an explanation of why this timeout value will work on long-running queries but fail fast
on connection issues.
"""

if not self.query_params:
raise ValueError("Please build the query first")

params = {"json": json.dumps(self.query_params)}
headers = {"Authorization": f'Bearer {self.credentials.get("api_key")}'}

response = handle_api_response(
url=self.API_ENDPOINT, params=params, headers=headers, timeout=timeout
)
return response.json()

@classmethod
def _get_col_names_google_analytics(
cls,
response: dict,
) -> List[str]:
"""Returns list of Google Analytics columns names"""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can standardize docstrings as in other sources

Deletes table from AWS Glue database and related file from Amazon S3, if specified.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


# Supermetrics allows pivoting GA data, in which case it generates additional columns,
# which are not enlisted in response's query metadata but are instead added as the first row of data.
is_pivoted = any(
field["field_split"] == "column"
for field in response["meta"]["query"]["fields"]
)

if is_pivoted:
if not response["data"]:
raise ValueError(
"Couldn't find column names as query returned no data."
)
columns = response["data"][0]
else:
# non-pivoted data; query fields match result fields
cols_meta = response["meta"]["query"]["fields"]
columns = [col_meta["field_name"] for col_meta in cols_meta]
return columns

@classmethod
def _get_col_names_other(cls, response: dict) -> List[str]:
"""Returns list of columns names (to Google Analytics use _get_col_names_google_analytics ()"""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can standardize docstrings as in other sources

Deletes table from AWS Glue database and related file from Amazon S3, if specified.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cols_meta = response["meta"]["query"]["fields"]
columns = [col_meta["field_name"] for col_meta in cols_meta]
return columns

def _get_col_names(self) -> List[str]:
"""Returns list of columns names"""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can standardize docstrings as in other sources

Deletes table from AWS Glue database and related file from Amazon S3, if specified.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


query_params_cp = deepcopy(self.query_params)
query_params_cp["offset_start"] = 0
query_params_cp["offset_end"] = 0
response: dict = Supermetrics(query_params=query_params_cp).to_json()
if self.query_params["ds_id"] == "GA":
return Supermetrics._get_col_names_google_analytics(response)
else:
return Supermetrics._get_col_names_other(response)

def to_df(self, if_empty: str = "warn") -> pd.DataFrame:
"""Download data into a pandas DataFrame.

Note that Supermetric can calculate some fields on the fly and alias them in the
returned result. For example, if the query requests the `position` field,
Supermetric may return an `Average position` caclulated field.
For this reason we take columns names from the actual results rather than from input fields.

Args:
if_empty (str, optional): What to do if query returned no data. Defaults to "warn".

Returns:
pd.DataFrame: the DataFrame containing query results

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing dot . and it starts with a lowercase letter

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"""
try:
columns = self._get_col_names()
except ValueError:
columns = None

data = self.to_json()["data"]

if data:
df = pd.DataFrame(data[1:], columns=columns).replace("", np.nan)
else:
df = pd.DataFrame(columns=columns)

if df.empty:
self._handle_if_empty(if_empty)

return df

def query(self, params: Dict[str, Any]):
self.query_params = params

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing docstring

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.query_params["api_key"] = self.credentials.get("api_key")
return self