Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scheduled queries google dbm #158

Draft
wants to merge 7 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions ack/readers/google_dbm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

import click
from ack.readers.google_dbm.config import POSSIBLE_REQUEST_TYPES
from ack.readers.google_dbm.config import POSSIBLE_REQUEST_TYPES, POSSIBLE_FREQUENCIES, POSSIBLE_TIMEZONE_CODES
from ack.readers.google_dbm.reader import GoogleDBMReader
from ack.utils.args import extract_args
from ack.utils.processor import processor
Expand All @@ -33,7 +33,10 @@
@click.option("--dbm-request-type", type=click.Choice(POSSIBLE_REQUEST_TYPES), required=True)
@click.option("--dbm-query-id")
@click.option("--dbm-query-title")
@click.option("--dbm-query-frequency", default="ONE_TIME")
@click.option("--dbm-query-frequency", type=click.Choice(POSSIBLE_FREQUENCIES), default="ONE_TIME")
@click.option("--dbm-query-timezone-code", type=click.Choice(POSSIBLE_TIMEZONE_CODES), default="America/New_York")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add this new option to the documentation ? A small guide on how to use sphinx is available here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes thank you

@click.option("--dbm-scheduled-start-date", type=click.DateTime())
@click.option("--dbm-scheduled-end-date", type=click.DateTime())
@click.option("--dbm-query-param-type", default="TYPE_TRUEVIEW")
@click.option("--dbm-start-date", type=click.DateTime())
@click.option("--dbm-end-date", type=click.DateTime())
Expand Down
22 changes: 21 additions & 1 deletion ack/readers/google_dbm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,26 @@
GOOGLE_TOKEN_URI = "https://accounts.google.com/o/oauth2/token"

DAY_RANGES = ("PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK")
POSSIBLE_FREQUENCIES = ("DAILY", "MONTHLY", "ONE_TIME", "QUARTERLY", "SEMI_MONTHLY", "WEEKLY")
POSSIBLE_TIMEZONE_CODES = (
"Africa/Johannesburg",
"America/Los_Angeles",
"America/New_York",
"America/Sao_Paulo",
"Asia/Dubai",
"Asia/Hong_Kong",
"Asia/Jerusalem",
"Asia/Shanghai",
"Asia/Tokyo",
"Australia/Sydney",
"Europe/London",
"Europe/Paris",
"Pacific/Auckland",
)
POSSIBLE_REQUEST_TYPES = [
"existing_query",
"custom_query",
"custom_scheduled_query",
"existing_query_report",
"custom_query_report",
"lineitems_objects",
Expand All @@ -43,7 +60,10 @@ class GoogleDBMReaderConfig(BaseModel):
request_type: Literal[tuple(POSSIBLE_REQUEST_TYPES)]
query_id: str = None
query_title: str = None
query_frequency: str = "ONE_TIME"
query_frequency: Literal[tuple(POSSIBLE_FREQUENCIES)]
query_timezone_code: Literal[tuple(POSSIBLE_TIMEZONE_CODES)]
scheduled_start_date: datetime
scheduled_end_date: datetime
query_param_type: str = "TYPE_TRUEVIEW"
start_date: datetime = None
end_date: datetime = None
Expand Down
48 changes: 38 additions & 10 deletions ack/readers/google_dbm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
from ack.readers.google_dbm.config import GOOGLE_TOKEN_URI
from ack.readers.reader import Reader
from ack.streams.format_date_stream import FormatDateStream
from ack.utils.date_handler import check_date_range_definition_conformity, get_date_start_and_date_stop_from_date_range
from ack.utils.date_handler import (
check_date_range_definition_conformity,
check_scheduled_parameters_definition_conformity,
get_date_start_and_date_stop_from_date_range,
)
from ack.utils.text import get_report_generator_from_flat_file, skip_last
from oauth2client import GOOGLE_REVOKE_URI, client
from tenacity import retry, stop_after_delay, wait_exponential
Expand Down Expand Up @@ -55,9 +59,19 @@ def __init__(self, access_token, refresh_token, client_secret, client_id, **kwar

self.kwargs = kwargs

check_date_range_definition_conformity(
self.kwargs.get("start_date"), self.kwargs.get("end_date"), self.kwargs.get("day_range")
)
is_scheduled_report = self.kwargs.get("request_type") == "custom_scheduled_query"

if not is_scheduled_report:
check_date_range_definition_conformity(
self.kwargs.get("start_date"), self.kwargs.get("end_date"), self.kwargs.get("day_range")
)
else:
check_scheduled_parameters_definition_conformity(
self.kwargs.get("scheduled_start_date"),
self.kwargs.get("scheduled_end_date"),
self.kwargs.get("query_frequency"),
self.kwargs.get("day_range"),
)

def get_query(self, query_id):
if query_id:
Expand All @@ -73,7 +87,8 @@ def get_existing_query(self):
else:
raise ClickException(f"No query found with the id {query_id}")

def get_query_body(self):
def get_query_body(self, is_scheduled):
scheduled_body = self.create_scheduled_body(is_scheduled)
body_q = {
"kind": "doubleclickbidmanager#query",
"metadata": {
Expand All @@ -87,18 +102,29 @@ def get_query_body(self):
"metrics": list(self.kwargs.get("query_metric", [])),
"filters": [{"type": filt[0], "value": str(filt[1])} for filt in self.kwargs.get("filter")],
},
"schedule": {"frequency": self.kwargs.get("query_frequency", "ONE_TIME")},
"schedule": scheduled_body,
}
if self.kwargs.get("start_date") is not None and self.kwargs.get("end_date") is not None:
if not is_scheduled and self.kwargs.get("start_date") is not None and self.kwargs.get("end_date") is not None:
body_q["metadata"]["dataRange"] = "CUSTOM_DATES"
body_q["reportDataStartTimeMs"] = 1000 * int(
(self.kwargs.get("start_date") + datetime.timedelta(days=1)).timestamp()
)
body_q["reportDataEndTimeMs"] = 1000 * int((self.kwargs.get("end_date") + datetime.timedelta(days=1)).timestamp())
return body_q

def create_and_get_query(self):
body_query = self.get_query_body()
def create_scheduled_body(self, is_scheduled):
if not is_scheduled:
return {"frequency": "ONE_TIME"}
else:
return {
"frequency": self.kwargs.get("query_frequency"),
"nextRunTimezoneCode": self.kwargs.get("query_timezone_code"),
"endTimeMs": 1000 * int((self.kwargs.get("scheduled_end_date") + datetime.timedelta(days=1)).timestamp()),
"startTimeMs": 1000 * int((self.kwargs.get("scheduled_start_date") + datetime.timedelta(days=1)).timestamp()),
}

def create_and_get_query(self, is_scheduled=False):
body_query = self.get_query_body(is_scheduled)
query = self._client.queries().createquery(body=body_query).execute()
return query

Expand Down Expand Up @@ -176,7 +202,9 @@ def read(self):
if request_type == "existing_query":
data = [self.get_existing_query()]
elif request_type == "custom_query":
data = [self.create_and_get_query()]
data = [self.create_and_get_query(is_scheduled=False)]
elif request_type == "custom_scheduled_query":
data = [self.create_and_get_query(is_scheduled=True)]
elif request_type == "existing_query_report":
data = self.get_query_report(existing_query=True)
elif request_type == "custom_query_report":
Expand Down
18 changes: 18 additions & 0 deletions ack/utils/date_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,24 @@ def check_date_range_definition_conformity(start_date: date, end_date: date, dat
raise DateDefinitionException("Report end date should be equal or ulterior to report start date.")


def check_scheduled_parameters_definition_conformity(
scheduled_start_date: date, scheduled_end_date: date, frequency: str, date_range: str
):

if not date_range:
Copy link
Contributor

@pol-defont-reaulx pol-defont-reaulx Jul 26, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I see it's almost the same code as check_date_range_definition_conformity, is it possible to call this function from the new one by doing the new tests before calling check_date_range_definition_conformity? It should avoid code duplication

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic is not the same for a scheduled report and for a normal report.
In a normal report, we shouldn't have both a date range and a couple of dates.
In a scheduled report, it is needed.

I could indeed use check_date_range_definition_conformity(scheduled_start_date, scheduled_end_date, None) after my else, but it seems to me that it is a "hack" just to avoid code deduplication that doesn't explain the logic behind the check.

What do you think @AlexisVLRT ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems logical to me, thanks for the explanation

raise DateDefinitionException("You must define a date_range for a scheduled report")
elif not frequency:
raise DateDefinitionException("You must define a frequency for a scheduled report")
else:
if not all([scheduled_start_date, scheduled_end_date]):
raise DateDefinitionException("You must define a couple (scheduled-start-date, scheduled-end-date)")
elif scheduled_end_date < scheduled_start_date:
raise DateDefinitionException(
"Report scheduled-end-date should be equal or ulterior to report \
scheduled-start-date."
)


def get_date_start_and_date_stop_from_date_range(date_range: str) -> Tuple[date, date]:
"""Returns date start and date stop based on the date range provided
and the current date.
Expand Down
5 changes: 4 additions & 1 deletion docs/source/readers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -724,10 +724,13 @@ CMD Options JSON Options Definition
``--dbm-client-secret`` ``client_secret`` OAuth2 secret
``--dbm-access-token`` ``access_token`` (Optional) Access token for OAuth2
``--dbm-refresh-token`` ``refresh_token`` Refresh token for OAuth2
``--dbm-query-request-type`` ``query_request_type`` Doubleclick Bid Manager API request type. Possible values: existing_query, custom_query, existing_query_report, custom_query_report, lineitems_objects, sdf_objects and list_reports.
``--dbm-query-request-type`` ``query_request_type`` Doubleclick Bid Manager API request type. Possible values: existing_query, custom_query, existing_query_report, custom_query_report, custom_scheduled_query, lineitems_objects, sdf_objects and list_reports.
``--dbm-query-id`` ``query_id`` Query ID.
``--dbm-query-title`` ``query_title`` Query title, used to name the reports generated from this query in DV360 UI.
``--dbm-query-frequency`` ``query_frequency`` How often the query is run. Possible values can be found `here <https://developers.google.com/bid-manager/v1/queries#schedule.frequency>`__. Default: ONE_TIME.
``--dbm-query-timezone-code`` ``query_timezone_code`` Canonical timezone code for report generation time. Defaults to America/New_York.
pol-defont-reaulx marked this conversation as resolved.
Show resolved Hide resolved
``--dbm-scheduled-start-date`` ``scheduled_start_date`` Date to start running scheduled query, in milliseconds since the Unix epoch. Not applicable to ONE_TIME frequency.
``--dbm-scheduled-end-date`` ``scheduled_end_date`` Date to stop running scheduled query, in milliseconds since the Unix epoch.
``--dbm-filter`` ``filter`` (list(tuple)) <FILTER_TYPE> <FILTER_VALUE> association, used to narrow the scope of the report. For instance "FILTER_ADVERTISER XXXXX" will narrow report scope to the performance of Advertiser ID XXXXX. Possible filter types can be found `here <https://developers.google.com/bid-manager/v1/filters-metrics#filters)>`__.
``--dbm-query-dimension`` ``query_dimension`` (list) Dimensions to include in the report. Possible values can be found `here <https://developers.google.com/bid-manager/v1/filters-metrics#filters>`__.
``--dbm-query-metric`` ``query_metric`` (list) Metrics to include in the report. Possible values can be found `here <https://developers.google.com/bid-manager/v1/filters-metrics#metrics>`__.
Expand Down
37 changes: 34 additions & 3 deletions tests/readers/google_dbm/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def mock_dbm_reader(self, **kwargs):
setattr(self, param, value)

@mock.patch.object(GoogleDBMReader, "__init__", mock_dbm_reader)
def test_get_query_body(self):
def test_get_query_body_not_scheduled(self):
kwargs = {}
reader = GoogleDBMReader(**kwargs)
reader.kwargs = {"filter": [("FILTER_ADVERTISER", 1)]}
Expand All @@ -46,7 +46,7 @@ def test_get_query_body(self):
"schedule": {"frequency": "ONE_TIME"},
}

self.assertDictEqual(reader.get_query_body(), expected_query_body)
self.assertDictEqual(reader.get_query_body(is_scheduled=False), expected_query_body)

@mock.patch.object(GoogleDBMReader, "__init__", mock_dbm_reader)
def test_get_query_body_ms_conversion(self):
Expand All @@ -71,4 +71,35 @@ def test_get_query_body_ms_conversion(self):
"reportDataStartTimeMs": 1579132800000,
"reportDataEndTimeMs": 1579392000000,
}
self.assertDictEqual(reader.get_query_body(), expected_query_body)
self.assertDictEqual(reader.get_query_body(is_scheduled=False), expected_query_body)

@mock.patch.object(GoogleDBMReader, "__init__", mock_dbm_reader)
def test_get_scheduled_query_body_ms_conversion(self):
kwargs = {}
reader = GoogleDBMReader(**kwargs)
reader.kwargs = {
"filter": [("FILTER_ADVERTISER", 1)],
"scheduled_start_date": datetime.datetime(2020, 1, 15, tzinfo=datetime.timezone.utc),
"scheduled_end_date": datetime.datetime(2020, 1, 18, tzinfo=datetime.timezone.utc),
"day_range": "LAST_7_DAYS",
"query_timezone_code": "America/New_York",
"query_frequency": "DAILY",
}

expected_query_body = {
"kind": "doubleclickbidmanager#query",
"metadata": {"format": "CSV", "title": "NO_TITLE_GIVEN", "dataRange": "LAST_7_DAYS"},
"params": {
"type": "TYPE_TRUEVIEW",
"groupBys": [],
"metrics": [],
"filters": [{"type": "FILTER_ADVERTISER", "value": "1"}],
},
"schedule": {
"frequency": "DAILY",
"nextRunTimezoneCode": "America/New_York",
"endTimeMs": 1579392000000,
"startTimeMs": 1579132800000,
},
}
self.assertDictEqual(reader.get_query_body(is_scheduled=True), expected_query_body)
46 changes: 46 additions & 0 deletions tests/utils/test_date_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from freezegun import freeze_time
from ack.utils.date_handler import (
check_date_range_definition_conformity,
check_scheduled_parameters_definition_conformity,
get_date_start_and_date_stop_from_date_range,
build_date_range,
)
Expand Down Expand Up @@ -85,3 +86,48 @@ def test_build_date_range_with_dates(self):
self.assertTupleEqual(
build_date_range(datetime(2021, 1, 1), datetime(2021, 1, 31), None), (datetime(2021, 1, 1), datetime(2021, 1, 31))
)

def test_check_scheduled_parameters_definition_missing_frequency(self):
with self.assertRaises(DateDefinitionException):
self.assertIsNone(
check_scheduled_parameters_definition_conformity(
datetime(2021, 1, 12), datetime(2021, 1, 31), None, "LAST_7_DAYS"
)
)

def test_check_scheduled_parameters_definition_missing_date_range(self):
with self.assertRaises(DateDefinitionException):
self.assertIsNone(
check_scheduled_parameters_definition_conformity(datetime(2021, 1, 12), datetime(2021, 1, 31), "DAILY", None)
)

@parameterized.expand(
[
(None, date(2021, 1, 12), "DAILY", "YESTERDAY"),
(None, date(2021, 1, 12), "DAILY", "YESTERDAY"),
(date(2021, 1, 12), None, "DAILY", "YESTERDAY"),
(date(2021, 1, 12), None, "DAILY", "YESTERDAY"),
]
)
def test_check_scheduled_parameters_definition_missing_dates(
self, scheduled_start_date, scheduled_end_date, frequency, date_range
):
with self.assertRaises(DateDefinitionException):
self.assertIsNone(
check_scheduled_parameters_definition_conformity(
scheduled_start_date, scheduled_end_date, frequency, date_range
)
)

def test_check_scheduled_parameters_definition_inconsistent(self):
with self.assertRaises(DateDefinitionException):
self.assertIsNone(
check_scheduled_parameters_definition_conformity(date(2021, 1, 12), date(2021, 1, 11), "DAILY", "YESTERDAY")
)

def test_check_scheduled_parameters_definition_conformity(self):
self.assertIsNone(
check_scheduled_parameters_definition_conformity(
datetime(2021, 1, 12), datetime(2021, 1, 31), "DAILY", "LAST_7_DAYS"
)
)