Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/date range option for all readers #83

Merged
merged 7 commits into from
Jan 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/readers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ Options Definition
``--adobe-2-0-metric`` Metric to include in the report
``--adobe-2-0-start-date`` Start date of the period to request (format: YYYY-MM-DD)
``--adobe-2-0-end-date`` Start date of the period to request (format: YYYY-MM-DD)
``--adobe-2-0-date-range`` Date range. By default, not available in Adobe, so choose among NCK default values: YESTERDAY, LAST_7_DAYS, PREVIOUS_WEEK, PREVIOUS_MONTH, LAST_90_DAYS
================================== =================================================================================================================================================================================

----------------------
Expand Down
98 changes: 43 additions & 55 deletions nck/readers/adobe_reader_2_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,29 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

import logging
import click
import json
import requests
import logging
import time
from itertools import chain
from datetime import timedelta
from itertools import chain

from nck.utils.retry import retry
from nck.utils.args import extract_args
from nck.commands.command import processor
from nck.readers.reader import Reader
import click
import requests
from click.exceptions import ClickException
from nck.clients.adobe_client import AdobeClient
from nck.streams.json_stream import JSONStream
from nck.commands.command import processor
from nck.helpers.adobe_helper_2_0 import (
APIRateLimitError,
add_metric_container_to_report_description,
get_node_values_from_response,
get_item_ids_from_nodes,
get_node_values_from_response,
parse_response,
)
from nck.readers.reader import Reader
from nck.streams.json_stream import JSONStream
from nck.utils.args import extract_args
from nck.utils.date_handler import DEFAULT_DATE_RANGE_FUNCTIONS, get_date_start_and_date_stop_from_date_range
from nck.utils.retry import retry

DATEFORMAT = "%Y-%m-%dT%H:%M:%S"
API_WINDOW_DURATION = 6
Expand Down Expand Up @@ -108,16 +110,19 @@ def format_key_if_needed(ctx, param, value):
)
@click.option(
"--adobe-2-0-start-date",
required=True,
type=click.DateTime(),
help="Start date of the report",
)
@click.option(
"--adobe-2-0-end-date",
required=True,
type=click.DateTime(),
help="End date of the report",
)
@click.option(
"--adobe-2-0-date-range",
type=click.Choice(DEFAULT_DATE_RANGE_FUNCTIONS.keys()),
help=f"One of the available NCK default date ranges: {DEFAULT_DATE_RANGE_FUNCTIONS.keys()}",
)
@processor(
"adobe_2_0_client_id",
"adobe_2_0_client_secret",
Expand All @@ -143,21 +148,30 @@ def __init__(
metric,
start_date,
end_date,
date_range,
):
self.adobe_client = AdobeClient(
client_id, client_secret, tech_account_id, org_id, private_key
)
self.adobe_client = AdobeClient(client_id, client_secret, tech_account_id, org_id, private_key)
self.global_company_id = global_company_id
self.report_suite_id = report_suite_id
self.dimensions = list(dimension)
self.metrics = list(metric)
self.start_date = start_date
self.end_date = end_date + timedelta(days=1)
if end_date is not None:
self.end_date = end_date + timedelta(days=1)
else:
self.end_date = end_date
self.date_range = date_range
self.ingestion_tracker = []
self.node_values = {}

def build_date_range(self):
return f"{self.start_date.strftime(DATEFORMAT)}/{self.end_date.strftime(DATEFORMAT)}"
if self.start_date is not None and self.end_date is not None and self.date_range is None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So do we raise an error in case of both start/end dates and date range are given by the user? What about checking if end-start = date range in case all parameters are given ?
Btw seems like the same error would occur if only one of the start/end date is None and the other one is not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes correct. This is documented in the issue #67. We do this to avoid conflicts between a date range and 2 dates. For example, if the user set a date start and a date stop but also a date range to LAST_7_DAYS, what would we do? We would have to give the priority to one parameter on the other but it would be implicit.

Your suggestion would work but it would make things more difficult imo, so I prefer letting things like that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I agree, this is enough for now. The value/time ratio for the feature I mentioned is too low to consider it for now.

return f"{self.start_date.strftime(DATEFORMAT)}/{self.end_date.strftime(DATEFORMAT)}"
elif self.start_date is None and self.end_date is None and self.date_range is not None:
start_date, end_date = get_date_start_and_date_stop_from_date_range(self.date_range)
return f"{start_date.strftime(DATEFORMAT)}/{(end_date + timedelta(days=1)).strftime(DATEFORMAT)}"
else:
raise ClickException("Dates are not defined properly. Please set start and end dates or a date range")

def build_report_description(self, metrics, breakdown_item_ids=[]):
"""
Expand All @@ -169,9 +183,7 @@ def build_report_description(self, metrics, breakdown_item_ids=[]):

rep_desc = {
"rsid": self.report_suite_id,
"globalFilters": [
{"type": "dateRange", "dateRange": self.build_date_range()}
],
"globalFilters": [{"type": "dateRange", "dateRange": self.build_date_range()}],
"metricContainer": {},
"dimension": f"variables/{self.dimensions[len(breakdown_item_ids)]}",
"settings": {"countRepeatInstances": "true", "limit": "5000"},
Expand All @@ -193,19 +205,11 @@ def throttle(self):

current_time = time.time()
self.ingestion_tracker.append(current_time)
window_ingestion_tracker = [
t
for t in self.ingestion_tracker
if t >= (current_time - API_WINDOW_DURATION)
]
window_ingestion_tracker = [t for t in self.ingestion_tracker if t >= (current_time - API_WINDOW_DURATION)]

if len(window_ingestion_tracker) >= API_REQUESTS_OVER_WINDOW_LIMIT:
sleep_time = (
window_ingestion_tracker[0] + API_WINDOW_DURATION - current_time
)
logging.warning(
f"Throttling activated: sleeping for {sleep_time} seconds..."
)
sleep_time = window_ingestion_tracker[0] + API_WINDOW_DURATION - current_time
logging.warning(f"Throttling activated: sleeping for {sleep_time} seconds...")
time.sleep(sleep_time)

@retry
Expand Down Expand Up @@ -251,9 +255,7 @@ def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}):
if first_response["totalPages"] > 1:
for page_nb in range(1, first_response["totalPages"]):
next_response = self.get_report_page(rep_desc, page_nb)
all_responses += [
parse_response(next_response, metrics, parent_dim_parsed)
]
all_responses += [parse_response(next_response, metrics, parent_dim_parsed)]

return chain(*all_responses)

Expand All @@ -264,17 +266,13 @@ def get_node_values(self, breakdown_item_ids):
For instance: {'daterangeday_1200001': 'Jan 1, 2020'}
"""

rep_desc = self.build_report_description(
metrics=["visits"], breakdown_item_ids=breakdown_item_ids
)
rep_desc = self.build_report_description(metrics=["visits"], breakdown_item_ids=breakdown_item_ids)
first_response = self.get_report_page(rep_desc)
node_values = get_node_values_from_response(first_response)

if first_response["totalPages"] > 1:
for page_nb in range(1, first_response["totalPages"]):
next_node_values = get_node_values_from_response(
self.get_report_page(rep_desc, page_nb)
)
next_node_values = get_node_values_from_response(self.get_report_page(rep_desc, page_nb))
node_values.update(next_node_values)

return node_values
Expand Down Expand Up @@ -333,13 +331,9 @@ def read_through_graph(self, graph=None, node=None):

# If no remaining node children to explore: get report
if len(path_to_node) == len(self.dimensions) - 1:
parent_dim_parsed = {
node.split("_")[0]: self.node_values[node] for node in path_to_node
}
parent_dim_parsed = {node.split("_")[0]: self.node_values[node] for node in path_to_node}
breakdown_item_ids = get_item_ids_from_nodes(path_to_node)
rep_desc = self.build_report_description(
self.metrics, breakdown_item_ids
)
rep_desc = self.build_report_description(self.metrics, breakdown_item_ids)
data = self.get_parsed_report(rep_desc, self.metrics, parent_dim_parsed)
yield from self.result_generator(data)

Expand All @@ -348,9 +342,7 @@ def read_through_graph(self, graph=None, node=None):
visited.append(node)

# Update unvisited_childs
unvisited_childs = [
child_node for child_node in graph[node] if child_node not in visited
]
unvisited_childs = [child_node for child_node in graph[node] if child_node not in visited]

# Read through child node children
for child_node in unvisited_childs:
Expand All @@ -366,10 +358,6 @@ def read_through_graph(self, graph=None, node=None):
def read(self):

if len(self.dimensions) == 1:
yield JSONStream(
"results_" + self.report_suite_id, self.read_one_dimension()
)
yield JSONStream("results_" + self.report_suite_id, self.read_one_dimension())
elif len(self.dimensions) > 1:
yield JSONStream(
"results_" + self.report_suite_id, self.read_through_graph()
)
yield JSONStream("results_" + self.report_suite_id, self.read_through_graph())
16 changes: 5 additions & 11 deletions nck/readers/dbm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from nck.streams.format_date_stream import FormatDateStream

from nck.utils.text import get_report_generator_from_flat_file, skip_last
from nck.utils.date_handler import get_date_start_and_date_stop_from_range
from nck.utils.date_handler import get_date_start_and_date_stop_from_date_range

from nck.helpers.dbm_helper import POSSIBLE_REQUEST_TYPES

Expand Down Expand Up @@ -78,9 +78,7 @@
"--dbm-day-range",
required=True,
default="LAST_7_DAYS",
type=click.Choice(
["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK"]
),
type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK"]),
)
@processor("dbm_access_token", "dbm_refresh_token", "dbm_client_secret")
def dbm(**kwargs):
Expand Down Expand Up @@ -146,9 +144,7 @@ def get_query_body(self):
body_q["reportDataStartTimeMs"] = 1000 * int(
(self.kwargs.get("start_date") + datetime.timedelta(days=1)).timestamp()
)
body_q["reportDataEndTimeMs"] = 1000 * int(
(self.kwargs.get("end_date") + datetime.timedelta(days=1)).timestamp()
)
body_q["reportDataEndTimeMs"] = 1000 * int((self.kwargs.get("end_date") + datetime.timedelta(days=1)).timestamp())
return body_q

def create_and_get_query(self):
Expand Down Expand Up @@ -190,14 +186,12 @@ def get_query_report(self, existing_query=True):
url = self.get_query_report_url(existing_query)
report = requests.get(url, stream=True)
if self.kwargs["query_param_type"] == "TYPE_REACH_AND_FREQUENCY" and self.kwargs["add_date_to_report"]:
start, stop = get_date_start_and_date_stop_from_range(self.kwargs["day_range"])
start, stop = get_date_start_and_date_stop_from_date_range(self.kwargs["day_range"])
column_dict = {
"date_start": start.strftime(self.kwargs.get("date_format")),
"date_stop": stop.strftime(self.kwargs.get("date_format")),
}
report_gen = get_report_generator_from_flat_file(
report.iter_lines(), add_column=True, column_dict=column_dict
)
report_gen = get_report_generator_from_flat_file(report.iter_lines(), add_column=True, column_dict=column_dict)
return skip_last(report_gen, 1)
else:
report_gen = get_report_generator_from_flat_file(report.iter_lines())
Expand Down
62 changes: 46 additions & 16 deletions nck/utils/date_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,49 @@
from typing import Tuple


def get_date_start_and_date_stop_from_range(
date_range: str
) -> Tuple[date, date]:
today = date.today()
if date_range == "PREVIOUS_MONTH":
last_day_of_previous_month = \
today.replace(day=1) - timedelta(days=1)
year = last_day_of_previous_month.year
month = last_day_of_previous_month.month
return date(year, month, 1), date(year, month, calendar.monthrange(year, month)[1])
elif date_range == "PREVIOUS_WEEK":
# The API uses American standard, weeks go from sunday yo next saturday
first_day_of_last_week = today - timedelta(days=today.weekday() + 1, weeks=1)
return first_day_of_last_week, first_day_of_last_week + timedelta(days=6)
else:
return None
def __get_yesterday_date(current_date: date) -> Tuple[date, date]:
yesterday = current_date - timedelta(days=1)
return yesterday, yesterday


def __get_last_7d_dates(current_date: date) -> Tuple[date, date]:
return current_date - timedelta(days=8), current_date - timedelta(days=1)


def __get_last_90d_dates(current_date: date) -> Tuple[date, date]:
return current_date - timedelta(days=91), current_date - timedelta(days=1)


def __get_previous_week_dates(current_date: date) -> Tuple[date, date]:
first_day_of_last_week = current_date - timedelta(days=current_date.weekday(), weeks=1)
return first_day_of_last_week, first_day_of_last_week + timedelta(days=6)


def __get_previous_month_dates(current_date: date) -> Tuple[date, date]:
last_day_of_previous_month = current_date.replace(day=1) - timedelta(days=1)
year = last_day_of_previous_month.year
month = last_day_of_previous_month.month
return date(year, month, 1), date(year, month, calendar.monthrange(year, month)[1])


DEFAULT_DATE_RANGE_FUNCTIONS = {
"YESTERDAY": __get_yesterday_date,
"LAST_7_DAYS": __get_last_7d_dates,
"PREVIOUS_WEEK": __get_previous_week_dates,
"PREVIOUS_MONTH": __get_previous_month_dates,
"LAST_90_DAYS": __get_last_90d_dates,
}


def get_date_start_and_date_stop_from_date_range(date_range: str) -> Tuple[date, date]:
"""Returns date start and date stop based on the date range provided
and the current date.

Args:
date_range (str): One of the default date ranges that exist

Returns:
Tuple[date, date]: date start and date stop that match the date range
"""
current_date = date.today()
return DEFAULT_DATE_RANGE_FUNCTIONS[date_range](current_date)
25 changes: 7 additions & 18 deletions tests/readers/test_adobe_reader_2_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class AdobeReaderTest_2_0(TestCase):
"metric": [],
"start_date": datetime.date(2020, 1, 1),
"end_date": datetime.date(2020, 1, 2),
"date_range": None,
}

@mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None)
Expand Down Expand Up @@ -78,9 +79,7 @@ def test_build_report_description_multiple_dimensions(self, mock_adobe_client):
metrics = ["visits", "bounces"]
breakdown_item_ids = ["000000000", "111111111"]

output = AdobeReader_2_0(**temp_kwargs).build_report_description(
metrics, breakdown_item_ids
)
output = AdobeReader_2_0(**temp_kwargs).build_report_description(metrics, breakdown_item_ids)
expected = {
"rsid": "XXXXXXXXX",
"globalFilters": [
Expand Down Expand Up @@ -163,9 +162,7 @@ def test_get_parsed_report(self, mock_adobe_client, mock_get_report_page):
)
metrics = ["visits", "bounces"]

output = AdobeReader_2_0(**temp_kwargs).get_parsed_report(
{"dimension": "variables/daterangeday"}, metrics
)
output = AdobeReader_2_0(**temp_kwargs).get_parsed_report({"dimension": "variables/daterangeday"}, metrics)
expected = [
{"daterangeday": "2020-01-01", "visits": 11, "bounces": 21},
{"daterangeday": "2020-01-02", "visits": 12, "bounces": 22},
Expand All @@ -192,9 +189,7 @@ def test_add_child_nodes_to_graph(self, mock_adobe_client, mock_get_node_values)
node = "daterangeday_1200201"
path_to_node = ["daterangeday_1200201"]

output = AdobeReader_2_0(**self.kwargs).add_child_nodes_to_graph(
graph, node, path_to_node
)
output = AdobeReader_2_0(**self.kwargs).add_child_nodes_to_graph(graph, node, path_to_node)
expected = {
"root": ["daterangeday_1200201", "daterangeday_1200202"],
"daterangeday_1200201": ["lasttouchchannel_1", "lasttouchchannel_2"],
Expand All @@ -212,13 +207,9 @@ def test_add_child_nodes_to_graph(self, mock_adobe_client, mock_get_node_values)
{"daterangeday": "2020-01-02", "visits": 12, "bounces": 22},
],
)
def test_read_one_dimension_reports(
self, mock_adobe_client, mock_get_parsed_report
):
def test_read_one_dimension_reports(self, mock_adobe_client, mock_get_parsed_report):
temp_kwargs = self.kwargs.copy()
temp_kwargs.update(
{"dimension": ["daterangeday"], "metric": ["visits", "bounces"]}
)
temp_kwargs.update({"dimension": ["daterangeday"], "metric": ["visits", "bounces"]})

output = next(AdobeReader_2_0(**temp_kwargs).read())
expected = [
Expand Down Expand Up @@ -289,9 +280,7 @@ def test_read_one_dimension_reports(
],
],
)
def test_read_multiple_dimension_reports(
self, mock_adobe_client, mock_add_child_nodes_to_graph, mock_get_parsed_report
):
def test_read_multiple_dimension_reports(self, mock_adobe_client, mock_add_child_nodes_to_graph, mock_get_parsed_report):
temp_kwargs = self.kwargs.copy()
temp_kwargs.update(
{
Expand Down
Loading