Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[migration2viatot2.0]: Migrated Supermetrics #671

Closed
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -381,3 +381,14 @@ specified in the `SUPERMETRICS_DEFAULT_USER` secret

### Fixed
- Fix `AzureBlobStorage`'s `to_storage()` method is missing the final upload blob part

## [Unreleased] - 2023-04-20
### Added
- Sources:
- `Supermetrics`
- Tests:
- unit
- `test_supermetrics`
trymzet marked this conversation as resolved.
Show resolved Hide resolved
### Changed
- config.py
- Issue detected: Config.py/get_source_config within Viadot2 possible bug #670
21 changes: 21 additions & 0 deletions tests/integration/test_supermetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from viadot.config import get_source_credentials
from viadot.sources import Supermetrics


def test_connection():
credentials = get_source_credentials("supermetrics")
s = Supermetrics()
google_ads_params = {
"ds_id": "AW",
"ds_accounts": ["1007802423"],
"ds_user": credentials.get("user"),
"date_range_type": "last_month",
"fields": [
"Date",
"Campaignname",
"Clicks",
],
"max_rows": 1,
}
df = s.query(google_ads_params).to_df()
assert df.count()[0] > 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you want to check the number of rows, you can just do assert len(df) == 1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the assert of the function output
[supermetrics-migration]: Done Pull Request Corrections

215 changes: 215 additions & 0 deletions tests/unit/test_supermetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
import pytest
import pandas as pd

from viadot.sources import Supermetrics
from viadot.config import get_source_credentials

RESPONSE_PIVOTED = {
"meta": {
"query": {
"fields": [
{
"id": "Date",
"field_id": "Date",
"field_name": "Date",
"field_type": "dim",
"field_split": "row",
},
{
"id": "profile",
"field_id": "profile",
"field_name": "View",
"field_type": "dim",
"field_split": "row",
},
{
"id": "segment",
"field_id": "segment",
"field_name": "Segment",
"field_type": "dim",
"field_split": "column",
},
{
"id": "Sessions",
"field_id": "Sessions",
"field_name": "Sessions",
"field_type": "met",
"field_split": "row",
},
]
},
"result": {"total_columns": 6, "total_rows": 700},
},
"data": [
[
"Date",
"View",
"M-site_TOTAL: Bounces Landing",
"M-site_TOTAL: Click to EDSP",
"M-site_TOTAL: MQL Conversion Page Sessions",
"M-site_TOTAL: Click to RWS",
],
["2020-01-01", "REDACTED", 123, 456, 78, 9],
],
}

RESPONSE_PIVOTED_NO_DATA = {
"meta": {
"query": {
"fields": [
{
"id": "Date",
"field_id": "Date",
"field_name": "Date",
"field_type": "dim",
"field_split": "row",
},
{
"id": "profileID",
"field_id": "profileID",
"field_name": "View ID",
"field_type": "dim",
"field_split": "row",
},
{
"id": "Hostname",
"field_id": "Hostname",
"field_name": "Hostname",
"field_type": "dim",
"field_split": "row",
},
{
"id": "profile",
"field_id": "profile",
"field_name": "View",
"field_type": "dim",
"field_split": "row",
},
{
"id": "segment",
"field_id": "segment",
"field_name": "Segment",
"field_type": "dim",
"field_split": "column",
},
{
"id": "Sessions",
"field_id": "Sessions",
"field_name": "Sessions",
"field_type": "met",
"field_split": "row",
},
]
},
"result": {"total_columns": 0, "total_rows": 0},
},
"data": [],
}


def test___get_col_names_other():
Copy link

@eSlider eSlider Apr 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function naming can be more human readable:
test get column names other vs. test getting other column names

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

functions names depends on source functions.
Added comments to be more human readable.
✅ Added changes to supermetrics source/test

cols_list = Supermetrics._get_col_names_other(response=RESPONSE_PIVOTED)
assert cols_list == ["Date", "View", "Segment", "Sessions"]


def test___get_col_names_google_analytics_pivoted():
columns = Supermetrics._get_col_names_google_analytics(response=RESPONSE_PIVOTED)
assert columns == [
"Date",
"View",
"M-site_TOTAL: Bounces Landing",

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can add some more unit tests for functions like to_json(), _get_col_names_other(), to_df()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"M-site_TOTAL: Click to EDSP",
"M-site_TOTAL: MQL Conversion Page Sessions",
"M-site_TOTAL: Click to RWS",
]


def test___get_col_names_google_analytics_pivoted_no_data():
with pytest.raises(ValueError):
Supermetrics._get_col_names_google_analytics(response=RESPONSE_PIVOTED_NO_DATA)


def test__query() -> bool:
credentials = get_source_credentials("supermetrics")
s = Supermetrics()
google_ads_params = {
"ds_id": "AW",
"ds_accounts": ["1007802423"],
"ds_user": credentials.get("user"),
"date_range_type": "last_month",
"fields": [
"Date",
"Campaignname",
"Clicks",
],
"max_rows": 1,
}
assert s.query(google_ads_params).credentials == credentials


def test__to_json():
# Create the query
credentials = get_source_credentials("supermetrics")
s = Supermetrics()
google_ads_params = {
"ds_id": "AW",
"ds_accounts": ["1007802423"],
"ds_user": credentials.get("user"),
"date_range_type": "last_month",
"fields": [
"Date",
"Campaignname",
"Clicks",
],
"max_rows": 1,
}
dict_ = s.query(google_ads_params).to_json()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is adding a dash or sub-dash at the end conventional?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dash added to distinguish python "dict" from our variable.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@burzekj, the dict contains supermetrics, whould it not be better to use the supermetrics for the naming the dict? Otherways, if we need some improvement on the test function, an we get another dict_, should it be named as dict__?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert list(dict_.keys()) == ["meta", "data"]


def test__to_df():
# Create the query
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is often repeated about query creation, although not everywhere, but it is better to describe what is going to be tested in the function annotation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

credentials = get_source_credentials("supermetrics")
s = Supermetrics()
google_ads_params = {
"ds_id": "AW",
"ds_accounts": ["1007802423"],
"ds_user": credentials.get("user"),
"date_range_type": "last_month",
"fields": [
"Date",
"Campaignname",
"Clicks",
],
"max_rows": 1,
}
df = s.query(google_ads_params).to_df()
df_expected = pd.DataFrame(
{
"Date": "2023-03-01",
"Campaign name": "FR : Brand VELUX (Exact)",
"Clicks": 749,
},
index=[0],
)
assert df.equals(df_expected)


def test___get_col_names():
# Create the query
credentials = get_source_credentials("supermetrics")
s = Supermetrics()
google_ads_params = {
"ds_id": "AW",
"ds_accounts": ["1007802423"],
"ds_user": credentials.get("user"),
"date_range_type": "last_month",
"fields": [
"Date",
"Campaignname",
"Clicks",
],
"max_rows": 1,
}
cols_list = s.query(google_ads_params)._get_col_names()
assert cols_list == ["Date", "Campaign name", "Clicks"]
62 changes: 62 additions & 0 deletions tests/unit/test_velux_club.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytest
import pandas as pd

from viadot.sources import VeluxClub, Historical_Too_Old, Source_NOK, Dates_NOK
from viadot.config import get_source_credentials

def test_veluxclub_HISTORICAL_TOO_OLD_failure():
with pytest.raises(Historical_Too_Old) as exc_info:
credentials = get_source_credentials('velux_club')
velux_club = VeluxClub(credentials=credentials)
df = velux_club.get_response(source="product", from_date="2021-01-01", to_date="2023-01-31")

def test_veluxclub_WRONG_SOURCE_failure():
with pytest.raises(Source_NOK) as exc_info:
credentials = get_source_credentials('velux_club')
velux_club = VeluxClub(credentials=credentials)
df = velux_club.get_response(source="product1", from_date="2023-01-01", to_date="2023-01-31")

def test_veluxclub_DATES_NOK_failure():
with pytest.raises(Dates_NOK) as exc_info:
credentials = get_source_credentials('velux_club')
velux_club = VeluxClub(credentials=credentials)
df = velux_club.get_response(source="product", from_date="2023-03-01", to_date="2023-01-31")


def test_veluxclub_connexion_product():
credentials = get_source_credentials('velux_club')
velux_club = VeluxClub(credentials=credentials)
df = velux_club.get_response(source="product", from_date="2023-01-01", to_date="2023-01-31")

cols_expected = ['submissionProductID', 'submissionID', 'regionID', 'productCode', 'productQuantity', 'submissionProductDate', 'brand', 'unit']
length_expected = 2618
assert list(df.columns) == cols_expected and len(df.index)==length_expected

def test_veluxclub_connexion_survey():
credentials = get_source_credentials('velux_club')
velux_club = VeluxClub(credentials=credentials)
df = velux_club.get_response(source="survey", from_date="2023-01-01", to_date="2023-01-31")

cols_expected = ['id', 'type', 'text']
length_expected = 18
assert list(df.columns) == cols_expected and len(df.index)==length_expected


def test_veluxclub_connexion_company():
credentials = get_source_credentials('velux_club')
velux_club = VeluxClub(credentials=credentials)
df = velux_club.get_response(source="company", from_date="2023-01-01", to_date="2023-01-31")

cols_expected = ['customerID', 'companyName', 'address1', 'town', 'postcode', 'companyNumber', 'country', 'serviceTechnician', 'areaManager', 'ASEID', 'customerType', 'membershipDate', 'status', 'firstName', 'lastName', 'email', 'msisdn', 'languageID', 'numberOfClosedProjects', 'numberOfOpenProjects', 'numberOfScans', 'totalPoints', 'pointsSpent', 'currentPoints', 'pendingPoints', 'expiredPoints', 'expiringPoints', 'rewardsOrdered', 'optinsms', 'optinemail', 'optinMarketing', 'lastlogin', 'totalLogins', 'q1Core', 'Column 1', 'Column 2', 'q2Core', 'q3Core', 'q4Core', 'q5Core', 'q6Core', 'q7Core', 'q8Core', 'Column 4', 'Column 3', 'Column 5', 'Column 6', 'Column 7', 'Column 8', 'Column 9', 'Column 10', 'Column 11', 'Column 12', 'Column 13']
length_expected = 112
assert list(df.columns) == cols_expected and len(df.index)==length_expected


def test_veluxclub_connexion_jobs():
credentials = get_source_credentials('velux_club')
velux_club = VeluxClub(credentials=credentials)
df = velux_club.get_response(source="jobs", from_date="2023-01-01", to_date="2023-01-31")

cols_expected = ['submissionID', 'submissionAddress', 'regionID', 'submissionPostcode', 'submissionDate', 'customerID', 'status', 'submissionQuantity', 'points', 'q1Core', 'q2Core', 'q2CoreOther', 'q3Core', 'q4Core', 'q5Core', 'q5CoreOther', 'q6Core', 'q6CoreOther', 'q7Core', 'q7CoreOther', 'q8Core', 'q8CoreOther', 'q9Core']
length_expected = 743
assert list(df.columns) == cols_expected and len(df.index)==length_expected
3 changes: 2 additions & 1 deletion viadot/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from .redshift_spectrum import RedshiftSpectrum
from .s3 import S3
from .sharepoint import Sharepoint
from .genesys import Genesys
from .supermetrics import Supermetrics
from .velux_club import VeluxClub, Historical_Too_Old, Dates_NOK, Source_NOK

try:
from .sap_rfc import SAPRFC
Expand Down
Loading