diff --git a/.env b/.env index 438ecf16..2cf00bdb 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ PROJECT_ID=artefact-docker-containers -DOCKER_IMAGE=nautilus-connector-kit -DOCKER_TAG=1.4.0 +DOCKER_IMAGE=nautilus-connectors-kit-dev +DOCKER_TAG=v1.1 DOCKER_REGISTRY=eu.gcr.io diff --git a/.github/workflows/buildtogcp.yml b/.github/workflows/buildtogcp.yml index 602177d6..7b59a7e2 100644 --- a/.github/workflows/buildtogcp.yml +++ b/.github/workflows/buildtogcp.yml @@ -34,13 +34,12 @@ on: # Environment variables available to all jobs and steps in this workflow env: - GCP_PROJECT: ${{ secrets.GCP_PROJECT }} GCP_EMAIL: ${{ secrets.GCP_EMAIL }} PROJECT_ID: ${{ secrets.PROJECT_ID }} DOCKER_TAG: ${{ github.run_id }} DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} DOCKER_IMAGE: ${{ secrets.DOCKER_IMAGE }}-${{ github.ref }} - + CLOUDSDK_PYTHON_SITEPACKAGES: 1 jobs: setup-build-publish: @@ -55,7 +54,7 @@ jobs: # Setup gcloud CLI - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master with: - version: '270.0.0' + version: '290.0.1' service_account_email: ${{ secrets.GCP_EMAIL }} service_account_key: ${{ secrets.GCP_KEY }} diff --git a/README.md b/README.md index 2c3e9fa0..e61b141c 100644 --- a/README.md +++ b/README.md @@ -6,23 +6,25 @@ Nautilus connectors kit is a tool which aim is getting raw data from different s ### Readers -- Google DoubleClick Manager (DBM / DV360) -- Google Campaign Manager (CM / DCM) -- Google Search Ads 360 (SA360) +- Adobe Analytics 1.4 +- Adobe Analytics 2.0 +- Amazon S3 +- Facebook Marketing +- Google Ads - Google Analytics -- Google Search Console -- Google Sheets - Google Cloud Storage -- Google Adwords +- Google Campaign Manager +- Google Display & Video 360 +- Google Search Ads 360 - Google Search Console -- Facebook Business Manager -- Amazon S3 +- Google Sheets - Oracle -- SalesForce - MySQL - Radarly -- Adobe Analytics 1.4 -- Yandex +- SalesForce +- Twitter Ads +- Yandex Campaign +- Yandex Statistics ### Writers @@ -97,4 +99,4 @@ It is advised to do the following in a virtual env * https://manikos.github.io/a-tour-on-python-packaging * http://lucumr.pocoo.org/2014/1/27/python-on-wheels/ -* https://pip.readthedocs.io/en/1.4.1/cookbook.html#controlling-setup-requires \ No newline at end of file +* https://pip.readthedocs.io/en/1.4.1/cookbook.html#controlling-setup-requires diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py new file mode 100644 index 00000000..359fcc94 --- /dev/null +++ b/nck/clients/adobe_client.py @@ -0,0 +1,81 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import logging +from datetime import datetime, timedelta +import requests +import jwt +from tenacity import retry, wait_exponential, stop_after_delay + +IMS_HOST = "ims-na1.adobelogin.com" +IMS_EXCHANGE = "https://ims-na1.adobelogin.com/ims/exchange/jwt" + +logging.basicConfig(level="INFO") +logger = logging.getLogger() + + +class AdobeClient: + """ + Create an Adobe Client for JWT Authentification. + Doc: https://github.com/AdobeDocs/adobeio-auth/blob/stage/JWT/JWT.md + Most of the code is taken from this repo: + https://github.com/AdobeDocs/analytics-2.0-apis/tree/master/examples/jwt/python + """ + + def __init__(self, client_id, client_secret, tech_account_id, org_id, private_key): + self.client_id = client_id + self.client_secret = client_secret + self.tech_account_id = tech_account_id + self.org_id = org_id + self.private_key = private_key + + # Creating jwt_token attribute + logging.info("Getting jwt_token.") + self.jwt_token = jwt.encode( + { + "exp": datetime.utcnow() + timedelta(seconds=30), + "iss": self.org_id, + "sub": self.tech_account_id, + f"https://{IMS_HOST}/s/ent_analytics_bulk_ingest_sdk": True, + "aud": f"https://{IMS_HOST}/c/{self.client_id}", + }, + self.private_key, + algorithm="RS256", + ) + + # Creating access_token attribute + logging.info("Getting access_token.") + self.access_token = self.get_access_token() + + @retry(wait=wait_exponential(multiplier=60, min=60, max=1200), stop=stop_after_delay(3600)) + def get_access_token(self): + post_body = {"client_id": self.client_id, "client_secret": self.client_secret, "jwt_token": self.jwt_token} + response = requests.post(IMS_EXCHANGE, data=post_body) + return response.json()["access_token"] + + def build_request_headers(self, global_company_id): + """ + Build request headers to be used to interract with Adobe Analytics APIs 2.0. + """ + return { + "Accept": "application/json", + "Authorization": f"Bearer {self.access_token}", + "Content-Type": "application/json", + "x-api-key": self.client_id, + "x-proxy-global-company-id": global_company_id, + } diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py new file mode 100644 index 00000000..621cf519 --- /dev/null +++ b/nck/helpers/adobe_helper_2_0.py @@ -0,0 +1,116 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import logging +from datetime import datetime + +logging.basicConfig(level="INFO") +logger = logging.getLogger() + + +class APIRateLimitError(Exception): + def __init__(self, message): + super().__init__(message) + logging.error(message) + + +def add_metric_container_to_report_description( + rep_desc, dimensions, metrics, breakdown_item_ids +): + """ + Filling the metricContainer section of a report description: + - Creates 1 filter per dimension breakdown x metric + - Applies filters to each metric + """ + + nb_breakdowns = len(breakdown_item_ids) + nb_metrics = len(metrics) + + rep_desc["metricContainer"]["metricFilters"] = [ + { + "id": i + j * nb_breakdowns, + "type": "breakdown", + "dimension": f"variables/{dimensions[i]}", + "itemId": breakdown_item_ids[i], + } + for j in range(nb_metrics) + for i in range(nb_breakdowns) + ] + + rep_desc["metricContainer"]["metrics"] = [ + { + "id": f"metrics/{metrics[j]}", + "filters": [i + j * nb_breakdowns for i in range(nb_breakdowns)], + } + for j in range(nb_metrics) + ] + + return rep_desc + + +def get_node_values_from_response(response): + """ + Extracting dimension values from a report response, + and returning them into a dictionnary of nodes: {name_itemId: value} + For instance: {'daterangeday_1200201': 'Mar 1, 2020'} + """ + + name = response["columns"]["dimension"]["id"].split("/")[1] + values = [row["value"] for row in response["rows"]] + item_ids = [row["itemId"] for row in response["rows"]] + + return {f"{name}_{item_id}": value for (item_id, value) in zip(item_ids, values)} + + +def get_item_ids_from_nodes(list_of_strings): + """ + Extacting item_ids from a list of nodes, + each node being expressed as 'name_itemId' + """ + + return [string.split("_")[1] for string in list_of_strings if string] + + +def format_date(date_string): + """ + Input: "Jan 1, 2020" + Output: "2020-01-01" + """ + return datetime.strptime(date_string, "%b %d, %Y").strftime("%Y-%m-%d") + + +def parse_response(response, metrics, parent_dim_parsed): + """ + Parsing a raw JSON response into the following format: + {dimension: value, metric: value} (1 dictionnary per row) + """ + + dimension = response["columns"]["dimension"]["id"].split("variables/")[1] + + for row in response["rows"]: + parsed_row_metrics = {m: v for m, v in zip(metrics, row["data"])} + parsed_row = { + **parent_dim_parsed, + dimension: row["value"], + **parsed_row_metrics, + } + parsed_row = { + k: (format_date(v) if k == "daterangeday" else v) + for k, v in parsed_row.items() + } + yield parsed_row diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index 8dc822f2..f52c6f02 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -15,160 +15,143 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + from facebook_business.adobjects.adsinsights import AdsInsights -BREAKDOWNS_POSSIBLE_VALUES = [v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__")] +FACEBOOK_OBJECTS = ["creative", "ad", "adset", "campaign", "account"] -ACTION_BREAKDOWNS_POSSIBLE_VALUES = [ - v for k, v in AdsInsights.ActionBreakdowns.__dict__.items() if not k.startswith("__") +DATE_PRESETS = [ + v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__") ] -AD_OBJECT_TYPES = ["adaccount", "campaign", "adset", "ad", "user"] - -LEVELS_POSSIBLE_VALUES = ["ad", "adset", "campaign", "account"] - -CMP_POSSIBLE_VALUES = [ - "account_id", - "adlabels", - "bid_strategy", - "boosted_object_id", - "brand_lift_studies", - "budget_rebalance_flag", - "budget_remaining", - "buying_type", - "can_create_brand_lift_study", - "can_use_spend_cap", - "configured_status", - "created_time", - "daily_budget", - "effective_status", - "id", - "issues_info", - "last_budget_toggling_time", - "lifetime_budget", - "name", - "objective", - "pacing_type", - "promoted_object", - "recommendations", - "source_campaign", - "source_campaign_id", - "spend_cap", - "start_time", - "status", - "stop_time", - "topline_id", - "updated_time", +BREAKDOWNS = [ + v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__") ] -# should have done this list comprehension selection but -# some of the fields are obsolet and doesn't work, i took the most important -# ADS_POSSIBLE_VALUES = [v for k,v in AdSet.Field.__dict__.items() if not k.startswith("__")] -ADS_POSSIBLE_VALUES = [ - "account_id", - "adlabels", - "asset_feed_id", - "budget_remaining", - "campaign", - "campaign_id", - "configured_status", - "created_time", - "creative_sequence", - "daily_budget", - "end_time", - "lifetime_budget", - "lifetime_imps", - "lifetime_min_spend_target", - "lifetime_spend_cap", - "name", - "pacing_type", - "source_adset", - "source_adset_id", - "start_time", - "status", +ACTION_BREAKDOWNS = [ + v + for k, v in AdsInsights.ActionBreakdowns.__dict__.items() + if not k.startswith("__") ] -DATE_PRESETS = [v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__")] - -DESIRED_FIELDS = { - "date_start": "date_start", - "date_stop": "date_stop", - "account_name": "account_name", - "account_id": "account_id", - "ad_id": "ad_id", - "ad_name": "ad_name", - "adset_id": "adset_id", - "adset_name": "adset_name", - "campaign_id": "campaign_id", - "campaign_name": "campaign_name", - "clicks": "clicks", - "link_clicks": "inline_link_clicks", - "outbound_clicks": ("outbound_clicks", "outbound_click"), - "impressions": "impressions", - "post_engagement": ("actions", "post_engagement"), - "purchases": ("actions", "omni_purchase"), - "website_purchases": ("actions", "offsite_conversion.fb_pixel_purchase"), - "purchases_conversion_value": ("action_values", "offsite_conversion.fb_pixel_purchase"), - "website_purchases_conversion_value": ("action_values", "omni_purchase"), - "website_purchase_roas": ("website_purchase_roas", "offsite_conversion.fb_pixel_purchase"), - "objective": "objective", - "reach": "reach", - "spend": "spend", - "video_plays_3s": ("actions", "video_view"), - "video_plays": ("video_play_actions", "video_view"), - "video_plays_100p": ("video_p100_watched_actions", "video_view"), - "video_plays_95p": ("video_p95_watched_actions", "video_view"), - "video_plays_75p": ("video_p75_watched_actions", "video_view"), - "video_plays_50p": ("video_p50_watched_actions", "video_view"), - "video_plays_25p": ("video_p25_watched_actions", "video_view"), - "age": "age", - "gender": "gender", - "account_currency": "account_currency", - "frequency": "frequency", - "buying_type": "buying_type", - "video_p100_watched_actions": "video_p100_watched_actions", - "video_p75_watched_actions": "video_p75_watched_actions", - "video_p25_watched_actions": "video_p25_watched_actions", - "video_p50_watched_actions": "video_p50_watched_actions", - "video_thruplay_watched_actions": "video_thruplay_watched_actions", - "conversions": "conversions", - "status": "status", - "lifetime_budget": "lifetime_budget", - "budget_remaining": "budget_remaining", - "name": "name", - "id": "id", - "start_time": "start_time", - "stop_time": "end_time", - "daily_budget": "daily_budget", - "device_platform": "device_platform", - "platform_position": "platform_position", - "publisher_platform": "publisher_platform", - "impression_device": "impression_device", - "link_url_asset": {"value": "website_url"}, -} - - -def get_field_value(row, field): - if is_url_asset(field): - return extract_special_field(row, field) - return ( - row.get(DESIRED_FIELDS[field], None) - if isinstance(DESIRED_FIELDS[field], str) - else get_nested_field_value(row, field) - ) - - -def extract_special_field(row, field): - dic = DESIRED_FIELDS[field] - return row.get(field, {}).get(dic.get("value"), None) - - -def is_url_asset(field): - return field == "link_url_asset" - - -def get_nested_field_value(row, field): - if DESIRED_FIELDS[field][0] not in row: - return None - nested_field = next((x for x in row[DESIRED_FIELDS[field][0]] if x["action_type"] == DESIRED_FIELDS[field][1]), {}) - return nested_field["value"] if nested_field else None + +def get_action_breakdown_filters(field_path): + """ + Extracts action breakdown filters from a field path, + and returns them as a dictionnary. + + For instance: + 'actions[action_type:video_view][action_type:post_engagement][action_device:iphone]' + returns: + {'action_type':['video_view','post_engagement'], + 'action_device':['iphone']} + """ + filters = {} + for path_item in field_path: + if ":" in path_item: + action_breakdown, action_breakdown_value = path_item.split(":") + filters.setdefault(action_breakdown, []).append(action_breakdown_value) + return filters + + +def format_field_path(field_path): + """ + Formats a field_path back into a field. + + For instance: + ['actions', 'action_type:post_engagement'] + returns: + 'actions[action_type:post_engagement]' + """ + if len(field_path) == 1: + return field_path[0] + else: + return "".join([field_path[0]] + [f"[{element}]" for element in field_path[1:]]) + + +def check_if_obj_meets_action_breakdown_filters(obj, filters): + """ + Checks if a nested action breakdown object + meets the conditions defined by action breakdown filters. + + For instance, if action breakdown filters are: + {'action_type': ['post_engagement', 'video_view'] + 'action_device': ['iphone']} + Outputs will be: + - {'action_type':'post_engagement', 'action_device':'iphone', 'value':12}: True + - {'action_type':'video_view', 'action_device':'iphone', 'value':12}: True + - {'action_type':'post_engagement', 'action_device':'desktop', 'value':12}: False + """ + for action_breakdown in filters: + if obj[action_breakdown] not in filters[action_breakdown]: + return False + return True + + +def get_action_breakdown_value(obj, visited, action_breakdowns): + """ + Extracts the action breakdown value + of a nested action breakdown object. + + For instance: + {actions: [{'action_type':'video_view', 'action_device':'iphone', 'value':'12'}]} + Here, the nested action_breakdown object is: + {'action_type':'video_view', 'action_device':'iphone', 'value':'12'} + returns: + {'actions[action_type:video_view][action_device:iphone]': '12'} + """ + obj_action_breakdown = [ + f"{action_breakdown}:{obj[action_breakdown]}" + for action_breakdown in action_breakdowns + if action_breakdown in obj + ] + return {format_field_path(visited + obj_action_breakdown): obj["value"]} + + +def get_all_action_breakdown_values(resp_obj, visited, action_breakdowns, filters={}): + """ + Extracts action breakdown values from a list of nested action breakdown objects, + only if they meet the conditions defined by action breakdown filters. + """ + action_breakdown_values = {} + for obj in resp_obj: + if filters != {}: + if check_if_obj_meets_action_breakdown_filters(obj, filters): + action_breakdown_values.update( + get_action_breakdown_value(obj, visited, action_breakdowns) + ) + else: + action_breakdown_values.update( + get_action_breakdown_value(obj, visited, action_breakdowns) + ) + return action_breakdown_values + + +def get_field_values(resp_obj, field_path, action_breakdowns, visited=[]): + """ + Recursive function extracting (and formating) the values + of a requested field from an API response and a field path. + """ + path_item = field_path[0] + remaining_path_items = len(field_path) - 1 + + visited.append(path_item) + + if path_item in resp_obj: + if remaining_path_items == 0: + if isinstance(resp_obj[path_item], str): + return {format_field_path(visited): resp_obj[path_item]} + if isinstance(resp_obj[path_item], list): + return get_all_action_breakdown_values( + resp_obj[path_item], visited, action_breakdowns + ) + else: + return get_field_values( + resp_obj[path_item], field_path[1:], action_breakdowns, visited + ) + else: + if all(":" in f for f in field_path): + filters = get_action_breakdown_filters(field_path) + return get_all_action_breakdown_values( + resp_obj, visited[:-1], action_breakdowns, filters + ) diff --git a/nck/readers/README.md b/nck/readers/README.md index fd730835..4fd9c826 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -10,46 +10,122 @@ Each reader role is to read data from external source and transform it into a St 4. Reference click command into [commands list](./__init__.py) 5. Update current README.md - ## Facebook Reader -- Example +#### Quickstart + +The Facebook Reader handles calls to 2 endpoints of the Facebook Marketing API: **Facebook Ad Insights** (to retrieve performance data), and **Facebook Object Node** (to retrieve configuration data). + +*Example of Facebook Ad Insights Request* +``` +python nck/entrypoint.py read_facebook --facebook-access-token --facebook-object-id --facebook-breakdown age --facebook-breakdown gender --facebook-action-breakdown action_type --facebook-field ad_id --facebook-field ad_name --facebook-field impressions --facebook-field clicks --facebook-field actions[action_type:post_engagement] --facebook-field actions[action_type:video_view] --facebook-field age --facebook-field gender --facebook-time-increment 1 --facebook-start-date 2020-01-01 --facebook-end-date 2020-01-03 write_console +``` + +*Example of Facebook Object Node Request* +``` +python nck/entrypoint.py read_facebook --facebook-access-token --facebook-object-id --facebook-ad-insights False --facebook-level ad --facebook-field id --facebook-field creative[id] --facebook-add-date-to-report True --facebook-start-date 2020-01-01 --facebook-end-date 2019-01-01 write_console +``` -The following command retrieves some insights of every Ads in the Facebook account thanks to -a Facebook App whose access_token is . +#### Parameters +|CLI option|Documentation| +|:--|:--| +|`--facebook-app-id`|Facebook App ID. *Not mandatory if Facebook Access Token is provided.*| +|`--facebook-app-secret`|Facebook App Secret. *Not mandatory if Facebook Access Token is provided.*| +|`--facebook-access-token`|Facebook App Access Token.| +|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Supported values: creative (available only for Facebook Object Node requests), ad, adset, campaign, account (default).*| +|`--facebook-object-id`|ID of the root Facebook Object used to make the request.| +|`--facebook-level`|Granularity of the response. *Supported values: creative (available only for Facebook Object Node requests), ad (default), adset, campaign or account.*| +|`--facebook-ad-insights`|*True* (default) if *Facebook Ad Insights* request, *False* if *Facebook Object Node* request.| +|`--facebook-field`|Fields to be retrieved.| +|`--facebook-start-date`|Start date of the requested time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Node requests at the Campaign, Adset and Ad levels.*| +|`--facebook-end-date`|End date of the requested time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Node requests at the Campaign, Adset and Ad levels.*| +|`--facebook-date-preset`|Relative time range. Ignored if *--facebook-start date* and *--facebook-end-date* are specified. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Node requests at the Campaign, Adset and Ad levels.*| +|`--facebook-time-increment`|Cuts the results between smaller time slices within the specified time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Node requests at the Campaign, Adset and Ad levels.*| +|`--facebook-add-date-to-report`|*True* if you wish to add the date of the request to each response record, *False* otherwise (default).| +|`--facebook-breakdown`|How to break down the result. *This parameter is only relevant for Facebook Ad Insights Requests.*| +|`--facebook-action-breakdown`|How to break down action results. *This parameter is only relevant for Facebook Ad Insights Requests.*| + +#### Additional details for a relevant use of the Facebook Reader + +**#1: Make sure to select the appropriate `--facebook-level`** + +|If Facebook Object Type is...|Facebook Level can be...| +|:--|:--| +|`account`|account, campaign, adset, ad, creative| +|`campaign`|campaign, adset, ad| +|`adset`|adset, ad, creative| +|`ad`|ad, creative| +|`creative`|creative| + +**#2: Format Facebook Reader response using `--facebook-fields`** + +2.1. The list of **applicable fields** can be found on the links below: + +- **Facebook Ad Insights Request**: [all fields](https://developers.facebook.com/docs/marketing-api/insights/parameters/v7.0) +- **Facebook Object Node Request**: [Account-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-account), [Campaign-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign-group), [Adset-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign), [Ad-level fields](https://developers.facebook.com/docs/marketing-api/reference/adgroup), [Creative-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-creative) + +2.2. If you want to select **a nested field value**, simply indicate the path to this value within the request field. + +*Facebook Reader Request* ``` -python nck/entrypoint.py read_facebook --facebook-access-token --facebook-ad-object-id --facebook-breakdown gender --facebook-level ad --facebook-start-date 2019-01-01 --facebook-end-date 2019-01-01 --facebook-field date_start --facebook-field date_stop --facebook-field account_currency --facebook-field account_id --facebook-field account_name --facebook-field ad_id --facebook-field ad_name --facebook-field adset_id --facebook-field adset_name --facebook-field campaign_id --facebook-field campaign_name --facebook-field clicks --facebook-field impressions --facebook-desired-field date_start --facebook-desired-field date_stop --facebook-desired-field account_name --facebook-desired-field account_id --facebook-desired-field ad_id --facebook-desired-field ad_namefacebook-desired-field clicks --facebook-desired-field impressions write_console +--facebook-field object_story_spec[video_data][call_to_action][value][link] ``` -The report below is the output of the command. You can easily store it in GCS or Biquery thanks to the corresponding -writers([GCS writer](../writers/gcs_writer.py), [BQ writer](../writers/bigquery_writer.py)): +*API Response* ``` -{ - "date_start": "2019-01-05", - "date_stop": "2019-01-05", - "account_name": "example_name" - "account_id": "0000000000" - "ad_id": "00000000000", - "ad_name": "example_name", - "clicks": "1", - "impressions": "100" +"object_story_spec": { + "video_data": { + "call_to_action": { + "type": "LEARN_MORE", + "value": { + "link": "https://www.artefact.com", + "link_format": "VIDEO_LPP" + } + } + } } ``` -See the [documentation here](https://developers.facebook.com/docs/marketing-api/insights/#marketing-api-quickstart "Create a Facebook App") -to create a Facebook App and an access token. -- Parameters of the Facebook Readers +*Facebook Reader Response* +``` +{"object_story_spec_video_data_call_to_action_value_link": "https://www.artefact.com"} +``` + +(2.3) **Action Breakdown filters** can be applied to the fields of ***Facebook Ad Insights* Requests** using the following syntax: [:]. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. -| --facebook-app-id | --facebook-app-secret | --facebook-access-token | --facebook-ad-object-id | --facebook-ad-object-type | --facebook-breakdown | --facebook-action-breakdown | --facebook-ad-insights | --facebook-level | --facebook-time-increment | --facebook-field | --facebook-desired-field | --facebook-start-date | --facebook-end-date | --facebook-date-preset | --facebook-request-date -|:-----------------:|:---------------------:|:-----------------------:|:-----------------------:|:-------------------------:|:--------------------:|:---------------------------:|:----------------------:|:-------------------:|:-------------------------:|:----------------:|:------------------------:|:---------------------:|:-------------------:|:----------------------:|:----------------------:| -|Facebook App ID |Facebook App ID| Facebook App access token|Object ID to request (account ID, campaign ID, ...)|Object type (account, campaign, ...)|List of breakdowns for the request|List of action-breakdowns for the request|If set to true, request insights|Represents the granularity of result|Time increment|List of fields to request|Desired fields in the output report |Start date of period|End date of period|Preset period|If set to true, the date of the request will appear in the report +*Facebook Reader Request* +``` +--facebook-action-breakdown action_type +--facebook-field actions[action_type:video_view][action_type:post_engagement] +``` -See the documents below for a better understanding of the parameters: -- [Facebook API Insights documentation](https://developers.facebook.com/docs/marketing-api/insights) -- [API Reference for Ad Insights](https://developers.facebook.com/docs/marketing-api/reference/adgroup/insights/) -- [Available Fields for Nautilus](../helpers/facebook_helper.py) +*API Response* +``` +"actions": [ + { + "action_type": "video_view", + "value": "17" + }, + { + "action_type": "link_click", + "value": "8" + }, + { + "action_type": "post_engagement", + "value": "25" + }, + { + "action_type": "page_engagement", + "value": "12" + } +] +``` +*Facebook Reader Response* +``` +{"actions_action_type_video_view": "17", "actions_action_type_post_engagement": "25"} +``` ## Google Readers @@ -284,6 +360,83 @@ Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/ | `--yandex-date-start` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-stop` and `--yandex-date-range` set to `CUSTOM_DATE`. | | `--yandex-date-stop` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-start` and `--yandex-date-range` set to `CUSTOM_DATE`. | +## Adobe Analytics Readers + +As of May 2020 (last update of this section of the documentation), **two versions of Adobe Analytics Reporting API are coexisting: 1.4 and 2.0**. As some functionalities of API 1.4 have not been made available in API 2.0 yet (Data Warehouse reports in particular), our Adobe Analytics Readers are also available in these two versions. + +### Adobe Analytics Reader 1.4 + +#### How to obtain credentials + +Our Adobe Analytics Reader 1.4 uses the **WSSE authentication framework**. This authentication framework is now deprecated, so you won't be able to generate new WSSE authentication credentials (Username, Password) on Adobe Developper Console if you don't already have them. + +#### Quickstart + +Call example to Adobe Analytics Reader 1.4, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: + +``` +python nck/entrypoint.py read_adobe --adobe-username --adobe-password --adobe-report-suite-id --adobe-date-granularity day --adobe-report-element-id trackingcode --adobe-report-metric-id visits --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 write_console +``` + +#### Parameters + +|CLI option|Documentation| +|--|--| +|`--adobe-username`|Username used for WSSE authentication| +|`--adobe-password`|Password used for WSSE authentication| +|`--adobe-list-report-suite`|Should be set to *True* if you wish to request the list of available Adobe Report Suites (*default: False*). If set to *True*, the below parameters should be left empty.| +|`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| +|`--adobe-report-element-id`|ID of the element (i.e. dimension) to include in the report| +|`--adobe-report-metric-id`|ID of the metric to include in the report| +|`--adobe-date-granularity`|Granularity of the report. *Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS*| +|`--adobe-start-date`|Start date of the report (format: YYYY-MM-DD)| +|`--adobe-end-date`|End date of the report (format: YYYY-MM-DD)| + +#### Addtional information +- **The full list of available elements and metrics** can be retrieved with the [GetElements](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetElements.md) and [GetMetrics](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetMetrics.md) methods. +- **Adobe Analytics Reader 1.4 requests Data Warehouse reports** (the "source" parameter is set to "warehouse" in the report description), allowing it to efficiently process multiple-dimension requests. +- **If you need further information**, the documentation of Adobe APIs 1.4 can be found [here](https://github.com/AdobeDocs/analytics-1.4-apis). + +### Adobe Analytics Reader 2.0 + +#### How to obtain credentials + +Adobe Analytics Reader 2.0 uses the **JWT authentication framework**. +- Get developper access to Adobe Analytics (documentation can be found [here](https://helpx.adobe.com/enterprise/using/manage-developers.html)) +- Create a Service Account integration to Adobe Analytics on [Adobe Developper Console](https://console.adobe.io/) +- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID, Organization ID and private.key file) to retrieve your Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md')). All these parameters will be passed to Adobe Analytics Reader 2.0. + +#### Quickstart + +Call example to Adobe Analytics Reader 2.0, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: + +``` +python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-client-secret --adobe-tech-account-id --adobe-org-id --adobe-private-key --adobe-global-company-id --adobe-report-suite-id --adobe-dimension daterangeday --adobe-dimension campaign --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 --adobe-metric visits write_console +``` + +#### Parameters + +|CLI option|Documentation| +|--|--| +|`--adobe-client-id`|Client ID, that you can find on Adobe Developper Console| +|`--adobe-client-secret`|Client Secret, that you can find on Adobe Developper Console| +|`--adobe-tech-account-id`|Technical Account ID, that you can find on Adobe Developper Console| +|`--adobe-org-id`|Organization ID, that you can find on Adobe Developper Console| +|`--adobe-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| +|`--adobe-global-company-id`|Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md'))| +|`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| +|`--adobe-dimension`|Dimension to include in the report| +|`--adobe-metric`|Metric to include in the report| +|`--adobe-start-date`|Start date of the report (format: YYYY-MM-DD)| +|`--adobe-end-date`|End date of the report (format: YYYY-MM-DD)| + +#### Additional information + +- **In API 2.0, dimension and metric names are slightly different from API 1.4**. To get new metric and dimension names and reproduce the behavior of Adobe Analytics UI as closely as possible, [enable the Debugger feature in Adobe Analytics Workspace](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md): it allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. +- **In API 2.0, the date granularity parameter was removed, and should now be handled as a dimension**: a request featuring `--adobe-dimension daterangeday` will produce a report with a day granularity. +- **API 2.0 does not feature Data Warehouse reports yet** (along with other features, that are indicated on the "Current limitations" section of [this page](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/migration-guide.md)). For this reason, if you wish to collect multiple-dimension reports, Adobe Analytics Reader 1.4 might be a more efficient solution in terms of processing time. +- **If you need any further information**, the documentation of Adobe APIs 2.0 can be found [here](https://github.com/AdobeDocs/analytics-2.0-apis). + ### Troubleshooting You encountered and you don't know what 's going on. You may find an answer in the troubleshooting guide below. diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 7d9993bb..669688d6 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -25,12 +25,13 @@ from nck.readers.oracle_reader import oracle from nck.readers.gsheets_reader import gsheets from nck.readers.salesforce_reader import salesforce -from nck.readers.facebook_reader import facebook_marketing +from nck.readers.facebook_reader import facebook from nck.readers.dbm_reader import dbm from nck.readers.dcm_reader import dcm from nck.readers.ga_reader import ga from nck.readers.search_console_reader import search_console from nck.readers.adobe_reader import adobe +from nck.readers.adobe_reader_2_0 import adobe_2_0 from nck.readers.radarly_reader import radarly from nck.readers.yandex_campaign_reader import yandex_campaigns from nck.readers.yandex_statistics_reader import yandex_statistics @@ -43,13 +44,14 @@ google_ads, s3, sa360_reader, - facebook_marketing, + facebook, oracle, dbm, dcm, ga, search_console, adobe, + adobe_2_0, radarly, yandex_campaigns, yandex_statistics diff --git a/nck/readers/adobe_reader.py b/nck/readers/adobe_reader.py index 6c0c4b5b..a4bc5319 100644 --- a/nck/readers/adobe_reader.py +++ b/nck/readers/adobe_reader.py @@ -109,7 +109,12 @@ def build_report_description(self): def get_days_delta(self): days_range = self.kwargs.get("day_range") - delta_mapping = {"PREVIOUS_DAY": 1, "LAST_7_DAYS": 7, "LAST_30_DAYS": 30, "LAST_90_DAYS": 90} + delta_mapping = { + "PREVIOUS_DAY": 1, + "LAST_7_DAYS": 7, + "LAST_30_DAYS": 30, + "LAST_90_DAYS": 90, + } try: days_delta = delta_mapping[days_range] except KeyError: @@ -126,15 +131,21 @@ def set_date_range_report_desc(self, report_description): else: end_date = datetime.datetime.now().date() start_date = end_date - datetime.timedelta(days=self.get_days_delta()) - report_description["reportDescription"]["dateFrom"] = start_date.strftime("%Y-%m-%d") - report_description["reportDescription"]["dateTo"] = end_date.strftime("%Y-%m-%d") + report_description["reportDescription"]["dateFrom"] = start_date.strftime( + "%Y-%m-%d" + ) + report_description["reportDescription"]["dateTo"] = end_date.strftime( + "%Y-%m-%d" + ) def set_date_gran_report_desc(self, report_description): """ Adds the dateGranularity parameter to a reportDescription. """ if self.kwargs.get("date_granularity", None) is not None: - report_description["reportDescription"]["dateGranularity"] = self.kwargs.get("date_granularity") + report_description["reportDescription"][ + "dateGranularity" + ] = self.kwargs.get("date_granularity") @retry def query_report(self): @@ -145,7 +156,9 @@ def query_report(self): - Output: reportID, to be passed to the Report.Get method - Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_Queue.md """ - query_report = self.request(api="Report", method="Queue", data=self.build_report_description()) + query_report = self.request( + api="Report", method="Queue", data=self.build_report_description() + ) return query_report @retry @@ -157,7 +170,11 @@ def get_report(self, report_id, page_number=1): - Output: reportResponse containing the requested report data - Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_Get.md """ - request_f = lambda: self.request(api="Report", method="Get", data={"reportID": report_id, "page": page_number}) + request_f = lambda: self.request( + api="Report", + method="Get", + data={"reportID": report_id, "page": page_number}, + ) response = request_f() idx = 1 while response.get("error") == "report_not_ready": diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py new file mode 100644 index 00000000..8aa25ab0 --- /dev/null +++ b/nck/readers/adobe_reader_2_0.py @@ -0,0 +1,333 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import logging +import click +import json +import requests +import time +from itertools import chain +from datetime import timedelta + +from nck.utils.retry import retry +from nck.utils.args import extract_args +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.clients.adobe_client import AdobeClient +from nck.streams.json_stream import JSONStream +from nck.helpers.adobe_helper_2_0 import ( + APIRateLimitError, + add_metric_container_to_report_description, + get_node_values_from_response, + get_item_ids_from_nodes, + parse_response, +) + +DATEFORMAT = "%Y-%m-%dT%H:%M:%S" +API_WINDOW_DURATION = 6 +API_REQUESTS_OVER_WINDOW_LIMIT = 12 + +logging.basicConfig(level="INFO") +logger = logging.getLogger() + + +def format_key_if_needed(ctx, param, value): + """ + In some cases, newlines are escaped when passed as a click.option(). + This callback corrects this unexpected behaviour. + """ + return value.replace("\\n", "\n") + + +@click.command(name="read_adobe_2_0") +@click.option( + "--adobe-2-0-client-id", + required=True, + help="Client ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-2-0-client-secret", + required=True, + help="Client Secret, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-2-0-tech-account-id", + required=True, + help="Technical Account ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-2-0-org-id", + required=True, + help="Organization ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-2-0-private-key", + required=True, + callback=format_key_if_needed, + help="Content of the private.key file, that you had to provide to create the integration. " + "Make sure to enter the parameter in quotes, include headers, and indicate newlines as '\\n'.", +) +@click.option( + "--adobe-2-0-global-company-id", + required=True, + help="Global Company ID, to be requested to Discovery API. " + "Doc: https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)", +) +@click.option("--adobe-2-0-report-suite-id", required=True, help="ID of the requested Adobe Report Suite") +@click.option( + "--adobe-2-0-dimension", + required=True, + multiple=True, + help="To get dimension names, enable the Debugger feature in Adobe Analytics Workspace: " + "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " + "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", +) +@click.option( + "--adobe-2-0-metric", + required=True, + multiple=True, + help="To get metric names, enable the Debugger feature in Adobe Analytics Workspace: " + "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " + "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", +) +@click.option("--adobe-2-0-start-date", required=True, type=click.DateTime(), help="Start date of the report") +@click.option("--adobe-2-0-end-date", required=True, type=click.DateTime(), help="End date of the report") +@processor( + "adobe_2_0_client_id", + "adobe_2_0_client_secret", + "adobe_2_0_tech_account_id", + "adobe_2_0_org_id", + "adobe_2_0_private_key", +) +def adobe_2_0(**kwargs): + return AdobeReader_2_0(**extract_args("adobe_2_0_", kwargs)) + + +class AdobeReader_2_0(Reader): + def __init__( + self, + client_id, + client_secret, + tech_account_id, + org_id, + private_key, + global_company_id, + report_suite_id, + dimension, + metric, + start_date, + end_date, + ): + self.adobe_client = AdobeClient(client_id, client_secret, tech_account_id, org_id, private_key) + self.global_company_id = global_company_id + self.report_suite_id = report_suite_id + self.dimensions = list(dimension) + self.metrics = list(metric) + self.start_date = start_date + self.end_date = end_date + timedelta(days=1) + self.ingestion_tracker = [] + self.node_values = {} + + def build_date_range(self): + return f"{self.start_date.strftime(DATEFORMAT)}/{self.end_date.strftime(DATEFORMAT)}" + + def build_report_description(self, metrics, breakdown_item_ids=[]): + """ + Building a report description, to be passed as a parameter to the Reporting API. + Documentation: + - https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-multiple-breakdowns.md + - https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md + """ + + rep_desc = { + "rsid": self.report_suite_id, + "globalFilters": [{"type": "dateRange", "dateRange": self.build_date_range()}], + "metricContainer": {}, + "dimension": f"variables/{self.dimensions[len(breakdown_item_ids)]}", + "settings": {"countRepeatInstances": "true", "limit": "5000"}, + } + + rep_desc = add_metric_container_to_report_description( + rep_desc=rep_desc, dimensions=self.dimensions, metrics=metrics, breakdown_item_ids=breakdown_item_ids + ) + + return rep_desc + + def throttle(self): + """ + Monitoring API rate limit (12 requests every 6 seconds). + """ + + current_time = time.time() + self.ingestion_tracker.append(current_time) + window_ingestion_tracker = [t for t in self.ingestion_tracker if t >= (current_time - API_WINDOW_DURATION)] + + if len(window_ingestion_tracker) >= API_REQUESTS_OVER_WINDOW_LIMIT: + sleep_time = window_ingestion_tracker[0] + API_WINDOW_DURATION - current_time + logging.warning(f"Throttling activated: sleeping for {sleep_time} seconds...") + time.sleep(sleep_time) + + @retry + def get_report_page(self, rep_desc, page_nb=0): + """ + Getting a single report page, and returning it into a raw JSON format. + """ + + self.throttle() + rep_desc["settings"]["page"] = page_nb + + response = requests.post( + f"https://analytics.adobe.io/api/{self.global_company_id}/reports", + headers=self.adobe_client.build_request_headers(self.global_company_id), + data=json.dumps(rep_desc), + ).json() + + if response.get("message") == "Too many requests": + raise APIRateLimitError("API rate limit was exceeded.") + + return response + + def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): + """ + Iterating over report pages, parsing them, and returning a list of iterators, + containing dictonnary-formatted records: {dimension: value, metric: value} + + The parent_dim_parsed argument (a dictionnary: {dimension: value}) + should be passed if the request includes multiple dimension breakdowns, + so that we can add their values to output records. + """ + + report_info = { + "parent_dim": parent_dim_parsed, + "dim": rep_desc["dimension"].split("variables/")[1], + "metrics": metrics, + } + logging.info(f"Getting report: {report_info}") + + first_response = self.get_report_page(rep_desc) + all_responses = [parse_response(first_response, metrics, parent_dim_parsed)] + + if first_response["totalPages"] > 1: + for page_nb in range(1, first_response["totalPages"]): + next_response = self.get_report_page(rep_desc, page_nb) + all_responses += [parse_response(next_response, metrics, parent_dim_parsed)] + + return chain(*all_responses) + + def get_node_values(self, breakdown_item_ids): + """ + Extracting dimension values from a full report response (all pages), + and returning them into a dictionnary of nodes: {name_itemId: value} + For instance: {'daterangeday_1200001': 'Jan 1, 2020'} + """ + + rep_desc = self.build_report_description(metrics=["visits"], breakdown_item_ids=breakdown_item_ids) + first_response = self.get_report_page(rep_desc) + node_values = get_node_values_from_response(first_response) + + if first_response["totalPages"] > 1: + for page_nb in range(1, first_response["totalPages"]): + next_node_values = get_node_values_from_response(self.get_report_page(rep_desc, page_nb)) + node_values.update(next_node_values) + + return node_values + + def add_child_nodes_to_graph(self, graph, node, path_to_node): + """ + Adding child nodes to Adobe graph, at two levels: + parent_node: [child_node_0, child_node_1, child_node_2] + child_node_0: [] + child_node_1: [] + child_node_2: [] + """ + + logging.info(f"Adding child nodes of '{node}' to graph.") + + breakdown_item_ids = get_item_ids_from_nodes(path_to_node) + child_node_values = self.get_node_values(breakdown_item_ids) + self.node_values.update(child_node_values) + + graph[node] = [n for n in child_node_values] + for n in child_node_values: + graph[n] = [] + + return graph + + def result_generator(self, data): + yield from data + + def read_one_dimension(self): + """ + If the requests includes only one dimension, it can be made straight away. + """ + + rep_desc = self.build_report_description(self.metrics) + data = self.get_parsed_report(rep_desc, self.metrics) + yield from self.result_generator(data) + + def read_through_graph(self, graph=None, node=None): + """ + If the request includes more than one dimension, it can be made + by exploring Adobe graph with a DFS (Deep-First-Search) algorithm. + """ + + global visited + global path_to_node + + if not graph: + # Create graph and add first level of nodes + graph, node, path_to_node, visited = {}, "root", [], [] + graph = self.add_child_nodes_to_graph(graph, node, path_to_node) + + else: + # If remaining node children to explore: add node children to graph + if len(path_to_node) < len(self.dimensions) - 1: + graph = self.add_child_nodes_to_graph(graph, node, path_to_node) + + # If no remaining node children to explore: get report + if len(path_to_node) == len(self.dimensions) - 1: + parent_dim_parsed = {node.split("_")[0]: self.node_values[node] for node in path_to_node} + breakdown_item_ids = get_item_ids_from_nodes(path_to_node) + rep_desc = self.build_report_description(self.metrics, breakdown_item_ids) + data = self.get_parsed_report(rep_desc, self.metrics, parent_dim_parsed) + yield from self.result_generator(data) + + # Add node to visited + if node not in visited: + visited.append(node) + + # Update unvisited_childs + unvisited_childs = [child_node for child_node in graph[node] if child_node not in visited] + + # Read through child node children + for child_node in unvisited_childs: + path_to_node.append(child_node) + yield from self.read_through_graph(graph=graph, node=child_node) + path_to_node.remove(child_node) + + # Remove local_root_node children from visited + if path_to_node != []: + local_root_node = path_to_node[-1] + visited = [n for n in visited if n not in graph[local_root_node]] + + def read(self): + + if len(self.dimensions) == 1: + yield JSONStream("results_" + self.report_suite_id, self.read_one_dimension()) + elif len(self.dimensions) > 1: + yield JSONStream("results_" + self.report_suite_id, self.read_through_graph()) diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 3e0f4fc3..b8501d29 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -15,10 +15,11 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + import logging import click -from itertools import chain +import re from click import ClickException from datetime import datetime @@ -28,51 +29,70 @@ from nck.utils.retry import retry from nck.streams.normalized_json_stream import NormalizedJSONStream from nck.helpers.facebook_helper import ( - AD_OBJECT_TYPES, - BREAKDOWNS_POSSIBLE_VALUES, - LEVELS_POSSIBLE_VALUES, + FACEBOOK_OBJECTS, DATE_PRESETS, - DESIRED_FIELDS, - get_field_value, + BREAKDOWNS, + ACTION_BREAKDOWNS, + get_action_breakdown_filters, + get_field_values, ) from facebook_business.api import FacebookAdsApi from facebook_business.adobjects.adaccount import AdAccount -from facebook_business.adobjects.adset import AdSet from facebook_business.adobjects.campaign import Campaign +from facebook_business.adobjects.adset import AdSet +from facebook_business.adobjects.ad import Ad +from facebook_business.adobjects.adcreative import AdCreative DATEFORMAT = "%Y-%m-%d" +OBJECT_CREATION_MAPPING = { + "account": AdAccount, + "campaign": Campaign, + "adset": AdSet, + "ad": Ad, + "creative": AdCreative, +} + +EDGE_MAPPING = { + "account": ["campaign", "adset", "ad", "creative"], + "campaign": ["adset", "ad"], + "adset": ["ad", "creative"], + "ad": ["creative"], +} + def check_object_id(ctx, param, values): try: [int(value) for value in values] return values except ValueError: - raise ClickException("Wrong format. Account ID should only contains digits") + raise ClickException("Wrong format. Ad object IDs should only contains digits.") @click.command(name="read_facebook") -@click.option("--facebook-app-id", default="", help="Not mandatory for AdsInsights reporting if access-token provided") @click.option( - "--facebook-app-secret", default="", help="Not mandatory for AdsInsights reporting if access-token provided" + "--facebook-app-id", + default="", + help="Not mandatory for AdsInsights reporting if access-token provided", +) +@click.option( + "--facebook-app-secret", + default="", + help="Not mandatory for AdsInsights reporting if access-token provided", ) @click.option("--facebook-access-token", required=True) -@click.option("--facebook-ad-object-id", required=True, multiple=True, callback=check_object_id) -@click.option("--facebook-ad-object-type", type=click.Choice(AD_OBJECT_TYPES), default=AD_OBJECT_TYPES[0]) @click.option( - "--facebook-breakdown", - multiple=True, - type=click.Choice(BREAKDOWNS_POSSIBLE_VALUES), - help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns/", + "--facebook-object-id", required=True, multiple=True, callback=check_object_id ) -# At this time, the Facebook connector only handle the action-breakdown "action_type" @click.option( - "--facebook-action-breakdown", - multiple=True, - type=click.Choice("action_type"), - default=["action_type"], - help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns#actionsbreakdown", + "--facebook-object-type", type=click.Choice(FACEBOOK_OBJECTS), default="account" +) +@click.option( + "--facebook-level", + type=click.Choice(FACEBOOK_OBJECTS), + default="ad", + help="Granularity of result", ) @click.option( "--facebook-ad-insights", @@ -81,20 +101,21 @@ def check_object_id(ctx, param, values): help="https://developers.facebook.com/docs/marketing-api/insights", ) @click.option( - "--facebook-level", - type=click.Choice(LEVELS_POSSIBLE_VALUES), - default=LEVELS_POSSIBLE_VALUES[0], - help="Represents the granularity of result", + "--facebook-breakdown", + multiple=True, + type=click.Choice(BREAKDOWNS), + help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns/", ) -@click.option("--facebook-time-increment") -@click.option("--facebook-field", multiple=True, help="Facebook API fields for the request") @click.option( - "--facebook-desired-field", + "--facebook-action-breakdown", multiple=True, - type=click.Choice(list(DESIRED_FIELDS.keys())), - help="Desired fields to get in the output report." - "https://developers.facebook.com/docs/marketing-api/insights/parameters/v5.0#fields", + type=click.Choice(ACTION_BREAKDOWNS), + help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns#actionsbreakdown", ) +@click.option( + "--facebook-field", multiple=True, help="API fields, following Artefact format" +) +@click.option("--facebook-time-increment") @click.option("--facebook-start-date", type=click.DateTime()) @click.option("--facebook-end-date", type=click.DateTime()) @click.option("--facebook-date-preset", type=click.Choice(DATE_PRESETS)) @@ -105,29 +126,24 @@ def check_object_id(ctx, param, values): help="If set to true, the date of the request will appear in the report", ) @processor("facebook_app_secret", "facebook_access_token") -def facebook_marketing(**kwargs): - # Should add later all the check restrictions on fields/parameters/breakdowns of the API following the value of - # object type, see more on : - # ---https://developers.facebook.com/docs/marketing-api/insights/breakdowns - # ---https://developers.facebook.com/docs/marketing-api/insights - return FacebookMarketingReader(**extract_args("facebook_", kwargs)) +def facebook(**kwargs): + return FacebookReader(**extract_args("facebook_", kwargs)) -class FacebookMarketingReader(Reader): +class FacebookReader(Reader): def __init__( self, app_id, app_secret, access_token, - ad_object_id, - ad_object_type, + object_id, + object_type, + level, + ad_insights, breakdown, action_breakdown, - ad_insights, - level, - time_increment, field, - desired_field, + time_increment, start_date, end_date, date_preset, @@ -136,125 +152,225 @@ def __init__( self.app_id = app_id self.app_secret = app_secret self.access_token = access_token - self.ad_object_ids = ad_object_id - self.ad_object_type = ad_object_type + + self.object_ids = object_id + self.object_type = object_type + self.level = level + + self.ad_insights = ad_insights self.breakdowns = list(breakdown) self.action_breakdowns = list(action_breakdown) - self.ad_insights = ad_insights - self.level = level - self.time_increment = time_increment or False self.fields = list(field) - self.desired_fields = list(desired_field) + self._field_paths = [re.split(r"[\]\[]+", f.strip("]")) for f in self.fields] + self._api_fields = list( + {f[0] for f in self._field_paths if f[0] not in self.breakdowns} + ) + + self.time_increment = time_increment or False self.start_date = start_date self.end_date = end_date self.date_preset = date_preset self.add_date_to_report = add_date_to_report - @retry - def run_query_on_fb_account_obj(self, params, ad_object_id): - account = AdAccount("act_" + ad_object_id) - for el in account.get_insights(params=params): - yield el + # Check input parameters - @retry - def run_query_on_fb_account_obj_conf(self, params, ad_object_id): - if ad_object_id.startswith("act_"): - raise ClickException("Wrong format. Account ID should only contains digits") - account = AdAccount("act_" + ad_object_id) - campaigns = account.get_campaigns() - for el in chain( - *[self.run_query_on_fb_campaign_obj_conf(params, campaign.get("id")) for campaign in campaigns] + if (self.level != self.object_type) and ( + self.level not in EDGE_MAPPING[self.object_type] ): - yield el - - @retry - def run_query_on_fb_campaign_obj_conf(self, params, ad_object_id): - campaign = Campaign(ad_object_id) - if self.level == LEVELS_POSSIBLE_VALUES[2]: - val_cmp = campaign.api_get(fields=self.desired_fields, params=params) - yield val_cmp - - elif self.level == LEVELS_POSSIBLE_VALUES[1]: - for el in chain( - *[self.run_query_on_fb_adset_obj_conf(params, adset.get("id")) for adset in campaign.get_ad_sets()] - ): - yield el - else: raise ClickException( - "Received level: " + self.level + ". Available levels are " + repr(LEVELS_POSSIBLE_VALUES[1:3]) + f"Wrong query. Asked level ({self.level}) is not compatible with object type ({self.object_type}).\ + Please choose level from: {[self.object_type] + EDGE_MAPPING[self.object_type]}" ) - @retry - def run_query_on_fb_adset_obj_conf(self, params, ad_object_id, level): - adset = AdSet(ad_object_id) - if level == LEVELS_POSSIBLE_VALUES[1]: - val_adset = adset.api_get(fields=self.desired_fields, params=params) - yield val_adset + if self.ad_insights: + + if self.level == "creative" or self.object_type == "creative": + raise ClickException( + f"Wrong query. The 'creative' level is not available in AdInsights queries.\ + Accepted levels: {FACEBOOK_OBJECTS[1:]}" + ) + + missing_breakdowns = { + f[0] + for f in self._field_paths + if (f[0] in BREAKDOWNS) and (f[0] not in self.breakdowns) + } + if missing_breakdowns != set(): + raise ClickException( + f"Wrong query. Please add to Breakdowns: {missing_breakdowns}" + ) + + missing_action_breakdowns = { + flt + for f in self._field_paths + for flt in get_action_breakdown_filters(f) + if flt not in self.action_breakdowns + } + if missing_action_breakdowns != set(): + raise ClickException( + f"Wrong query. Please add to Action Breakdowns: {missing_action_breakdowns}" + ) + else: - raise ClickException("Adset setup is available at 'adset' level. Received level: " + self.level) + + if self.breakdowns != [] or self.action_breakdowns != []: + raise ClickException( + "Wrong query. Facebook Object Node queries do not accept Breakdowns nor Action Breakdowns." + ) + + if self.time_increment: + raise ClickException( + "Wrong query. Facebook Object Node queries do not accept the time_increment parameter." + ) def get_params(self): - params = { - "action_breakdowns": self.action_breakdowns, - "fields": self.fields, - "breakdowns": self.breakdowns, - "level": self.level, - } - self.add_period_to_parameters(params) + """ + Build the request parameters that will be sent to the API: + - If AdInsights query: breakdown, action_breakdowns, level, time_range and date_preset + - If Facebook Object Node query at the campaign, adset or ad level: time_range and date_preset + """ + params = {} + + if self.ad_insights: + + params["breakdowns"] = self.breakdowns + params["action_breakdowns"] = self.action_breakdowns + params["level"] = self.level + self.add_period_to_params(params) + + else: + if self.level in ["campaign", "adset", "ad"]: + self.add_period_to_params(params) + return params - def add_period_to_parameters(self, params): - if self.time_increment: + def add_period_to_params(self, params): + """ + Add the time_increment, time_range and/or date_preset keys to parameters. + - time_increment: available in AdInsights queries + - time_range and date_preset: available in AdInsights queries, + and in Facebook Object Node queries at the campaign, adset or ad levels only + """ + if self.ad_insights and self.time_increment: params["time_increment"] = self.time_increment - if self.start_date and self.end_date: - logging.info("Date format used for request : start_date and end_date") - params["time_range"] = self.create_time_range(self.start_date, self.end_date) - elif self.date_preset: - logging.info("Date format used for request : date_preset") - params["date_preset"] = self.date_preset - else: - logging.warning("No date range provided - Last 30 days by default") - logging.warning( - "https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters" - ) - @staticmethod - def create_time_range(start_date, end_date): - return {"since": start_date.strftime(DATEFORMAT), "until": end_date.strftime(DATEFORMAT)} + if self.ad_insights or self.level in ["campaign", "adset", "ad"]: + if self.start_date and self.end_date: + logging.info("Date format used for request: start_date and end_date") + params["time_range"] = self.create_time_range() + elif self.date_preset: + logging.info("Date format used for request: date_preset") + params["date_preset"] = self.date_preset + else: + logging.warning("No date range provided - Last 30 days by default") + logging.warning( + "https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters" + ) + + def create_time_range(self): + return { + "since": self.start_date.strftime(DATEFORMAT), + "until": self.end_date.strftime(DATEFORMAT), + } + + def create_object(self, object_id): + """ + Create a Facebook object based on the provided object_type and object_id. + """ + if self.object_type == "account": + object_id = "act_" + object_id + obj = OBJECT_CREATION_MAPPING[self.object_type](object_id) + + return obj + + @retry + def query_ad_insights(self, fields, params, object_id): + """ + AdInsights documentation: + https://developers.facebook.com/docs/marketing-api/insights + """ + # Step 1 - Create Facebook object + obj = self.create_object(object_id) + + # Step 2 - Run AdInsights query on Facebook object + for element in obj.get_insights(fields=fields, params=params): + yield element + + @retry + def query_object_node(self, fields, params, object_id): + """ + Supported Facebook Object Nodes: AdAccount, Campaign, AdSet, Ad and AdCreative + Documentation: https://developers.facebook.com/docs/marketing-api/reference/ + """ + # Step 1 - Create Facebook object + obj = self.create_object(object_id) + + # Step 2 - Run Facebook Object Node query on the Facebook object itself, + # or on one of its edges (depending on the specified level) + if self.level == self.object_type: + yield obj.api_get(fields=fields, params=params) + else: + EDGE_QUERY_MAPPING = { + "campaign": obj.get_campaigns(), + "adset": obj.get_ad_sets(), + "ad": obj.get_ads(), + "creative": obj.get_ad_creatives(), + } + edge_objs = EDGE_QUERY_MAPPING[self.level] + for element in [ + edge_obj.api_get(fields=fields, params=params) for edge_obj in edge_objs + ]: + yield element def format_and_yield(self, record): - report = {field: get_field_value(record, field) for field in self.desired_fields} + """ + Parse a single record into an {item: value} dictionnary. + """ + report = {} + + for field_path in self._field_paths: + field_values = get_field_values( + record, field_path, self.action_breakdowns, visited=[] + ) + if field_values: + report.update(field_values) + if self.add_date_to_report: report["date"] = datetime.today().strftime(DATEFORMAT) + yield report def result_generator(self, data): + """ + Parse all records into an {item: value} dictionnary. + """ for record in data: yield from self.format_and_yield(record.export_all_data()) - def get_data(self): - for object_id in self.ad_object_ids: - yield from self.get_data_for_object(object_id) - - def get_data_for_object(self, ad_object_id): + def get_data_for_object(self, object_id): + """ + Run an API query (AdInsights or Facebook Object Node) on a single object_id. + """ params = self.get_params() + if self.ad_insights: - query_mapping = {AD_OBJECT_TYPES[0]: self.run_query_on_fb_account_obj} + data = self.query_ad_insights(self._api_fields, params, object_id) else: - query_mapping = { - AD_OBJECT_TYPES[0]: self.run_query_on_fb_account_obj_conf, - AD_OBJECT_TYPES[1]: self.run_query_on_fb_campaign_obj_conf, - AD_OBJECT_TYPES[2]: self.run_query_on_fb_adset_obj_conf, - } - try: - query = query_mapping[self.ad_object_type] - data = query(params, ad_object_id) - except KeyError: - raise ClickException("`{}` is not a valid adObject type".format(self.ad_object_type)) + data = self.query_object_node(self._api_fields, params, object_id) + yield from self.result_generator(data) + def get_data(self): + """ + Run API queries on all object_ids. + """ + for object_id in self.object_ids: + yield from self.get_data_for_object(object_id) + def read(self): + FacebookAdsApi.init(self.app_id, self.app_secret, self.access_token) yield NormalizedJSONStream( - "results_" + self.ad_object_type + "_" + "_".join(self.ad_object_ids), self.get_data() + "results_" + self.object_type + "_" + "_".join(self.object_ids), + self.get_data(), ) diff --git a/nck/streams/normalized_json_stream.py b/nck/streams/normalized_json_stream.py index 6dfa89e6..164b5e83 100644 --- a/nck/streams/normalized_json_stream.py +++ b/nck/streams/normalized_json_stream.py @@ -47,4 +47,9 @@ def _normalize_key(key): .replace(":", "_") .replace("/", "_") .replace("\\", "_") + .replace("][", "_") + .replace("[", "_") + .replace("]", "_") + .replace(".", "_") + .strip("_") ) diff --git a/requirements.txt b/requirements.txt index 9c7782e3..e0bacf59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -60,4 +60,6 @@ Unidecode==1.1.1 uritemplate==3.0.0 urllib3==1.25.7 Werkzeug==0.16.0 -googleads==22.0.0 \ No newline at end of file +googleads==22.0.0 +pyjwt==1.7.1 +cryptography==2.9 \ No newline at end of file diff --git a/tests/readers/test_adobe_reader_2_0.py b/tests/readers/test_adobe_reader_2_0.py new file mode 100644 index 00000000..e953227a --- /dev/null +++ b/tests/readers/test_adobe_reader_2_0.py @@ -0,0 +1,341 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from nck.readers.adobe_reader_2_0 import AdobeReader_2_0 +from unittest import TestCase, mock + +import datetime + + +class AdobeReaderTest_2_0(TestCase): + + kwargs = { + "client_id": "", + "client_secret": "", + "tech_account_id": "", + "org_id": "", + "private_key": "", + "global_company_id": "", + "report_suite_id": "XXXXXXXXX", + "dimension": [], + "metric": [], + "start_date": datetime.date(2020, 1, 1), + "end_date": datetime.date(2020, 1, 2), + } + + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + def test_build_date_range(self, mock_adobe_client): + output = AdobeReader_2_0(**self.kwargs).build_date_range() + expected = "2020-01-01T00:00:00/2020-01-03T00:00:00" + self.assertEqual(output, expected) + + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + def test_build_report_description_one_dimension(self, mock_adobe_client): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"dimension": ["daterangeday"]}) + metrics = ["visits", "bounces"] + + output = AdobeReader_2_0(**temp_kwargs).build_report_description(metrics) + expected = { + "rsid": "XXXXXXXXX", + "globalFilters": [ + { + "type": "dateRange", + "dateRange": "2020-01-01T00:00:00/2020-01-03T00:00:00", + } + ], + "metricContainer": { + "metricFilters": [], + "metrics": [ + {"id": "metrics/visits", "filters": []}, + {"id": "metrics/bounces", "filters": []}, + ], + }, + "dimension": "variables/daterangeday", + "settings": {"countRepeatInstances": "true", "limit": "5000"}, + } + self.assertEqual(output, expected) + + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + def test_build_report_description_multiple_dimensions(self, mock_adobe_client): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"dimension": ["daterangeday", "campaign", "pagename"]}) + metrics = ["visits", "bounces"] + breakdown_item_ids = ["000000000", "111111111"] + + output = AdobeReader_2_0(**temp_kwargs).build_report_description( + metrics, breakdown_item_ids + ) + expected = { + "rsid": "XXXXXXXXX", + "globalFilters": [ + { + "type": "dateRange", + "dateRange": "2020-01-01T00:00:00/2020-01-03T00:00:00", + } + ], + "metricContainer": { + "metricFilters": [ + { + "id": 0, + "type": "breakdown", + "dimension": "variables/daterangeday", + "itemId": "000000000", + }, + { + "id": 1, + "type": "breakdown", + "dimension": "variables/campaign", + "itemId": "111111111", + }, + { + "id": 2, + "type": "breakdown", + "dimension": "variables/daterangeday", + "itemId": "000000000", + }, + { + "id": 3, + "type": "breakdown", + "dimension": "variables/campaign", + "itemId": "111111111", + }, + ], + "metrics": [ + {"id": "metrics/visits", "filters": [0, 1]}, + {"id": "metrics/bounces", "filters": [2, 3]}, + ], + }, + "dimension": "variables/pagename", + "settings": {"countRepeatInstances": "true", "limit": "5000"}, + } + self.assertEqual(output, expected) + + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_report_page", + side_effect=[ + { + "totalPages": 2, + "firstPage": True, + "lastPage": False, + "columns": {"dimension": {"id": "variables/daterangeday"}}, + "rows": [ + {"itemId": "1200201", "value": "Jan 1, 2020", "data": [11, 21]}, + {"itemId": "1200202", "value": "Jan 2, 2020", "data": [12, 22]}, + ], + }, + { + "totalPages": 2, + "firstPage": False, + "lastPage": True, + "columns": {"dimension": {"id": "variables/daterangeday"}}, + "rows": [ + {"itemId": "1200203", "value": "Jan 3, 2020", "data": [13, 23]}, + {"itemId": "1200204", "value": "Jan 4, 2020", "data": [14, 24]}, + ], + }, + ], + ) + def test_get_parsed_report(self, mock_adobe_client, mock_get_report_page): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + { + "dimension": ["daterangeday"], + "start_date": datetime.date(2020, 1, 1), + "end_date": datetime.date(2020, 1, 4), + } + ) + metrics = ["visits", "bounces"] + + output = AdobeReader_2_0(**temp_kwargs).get_parsed_report( + {"dimension": "variables/daterangeday"}, metrics + ) + expected = [ + {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, + {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, + {"daterangeday": "2020-01-03", "visits": 13, "bounces": 23}, + {"daterangeday": "2020-01-04", "visits": 14, "bounces": 24}, + ] + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) + + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_node_values", + return_value={ + "lasttouchchannel_1": "Paid Search", + "lasttouchchannel_2": "Natural_Search", + }, + ) + def test_add_child_nodes_to_graph(self, mock_adobe_client, mock_get_node_values): + graph = { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": [], + "daterangeday_1200202": [], + } + node = "daterangeday_1200201" + path_to_node = ["daterangeday_1200201"] + + output = AdobeReader_2_0(**self.kwargs).add_child_nodes_to_graph( + graph, node, path_to_node + ) + expected = { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": ["lasttouchchannel_1", "lasttouchchannel_2"], + "daterangeday_1200202": [], + "lasttouchchannel_1": [], + "lasttouchchannel_2": [], + } + self.assertEqual(output, expected) + + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_parsed_report", + return_value=[ + {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, + {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, + ], + ) + def test_read_one_dimension_reports( + self, mock_adobe_client, mock_get_parsed_report + ): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + {"dimension": ["daterangeday"], "metric": ["visits", "bounces"]} + ) + + output = next(AdobeReader_2_0(**temp_kwargs).read()) + expected = [ + {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, + {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, + ] + for output_record, expected_output in zip(output.readlines(), iter(expected)): + self.assertEqual(output_record, expected_output) + + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.add_child_nodes_to_graph", + side_effect=[ + { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": [], + "daterangeday_1200202": [], + }, + { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": ["lasttouchchannel_1"], + "daterangeday_1200202": [], + "lasttouchchannel_1": [], + }, + { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": ["lasttouchchannel_1"], + "daterangeday_1200202": ["lasttouchchannel_2"], + "lasttouchchannel_1": [], + "lasttouchchannel_2": [], + }, + ], + ) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_parsed_report", + side_effect=[ + [ + { + "daterangeday": "2020-01-01", + "lastouchchannel": "Paid Search", + "campaign": "Campaign_1", + "visits": 11, + "bounces": 21, + }, + { + "daterangeday": "2020-01-01", + "lastouchchannel": "Paid Search", + "campaign": "Campaign_2", + "visits": 12, + "bounces": 22, + }, + ], + [ + { + "daterangeday": "2020-01-02", + "lastouchchannel": "Natural Search", + "campaign": "Campaign_1", + "visits": 13, + "bounces": 23, + }, + { + "daterangeday": "2020-01-02", + "lastouchchannel": "Natural Search", + "campaign": "Campaign_2", + "visits": 14, + "bounces": 24, + }, + ], + ], + ) + def test_read_multiple_dimension_reports( + self, mock_adobe_client, mock_add_child_nodes_to_graph, mock_get_parsed_report + ): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + { + "dimension": ["daterangeday", "lastouchchannel", "campaign"], + "metric": ["visits", "bounces"], + } + ) + reader = AdobeReader_2_0(**temp_kwargs) + reader.node_values = { + "daterangeday_1200201": "Jan 1, 2020", + "daterangeday_1200202": "Jan 2, 2020", + "lasttouchchannel_1": "Paid Search", + "lasttouchchannel_2": "Natural Search", + } + output = next(reader.read()) + expected = [ + { + "daterangeday": "2020-01-01", + "lastouchchannel": "Paid Search", + "campaign": "Campaign_1", + "visits": 11, + "bounces": 21, + }, + { + "daterangeday": "2020-01-01", + "lastouchchannel": "Paid Search", + "campaign": "Campaign_2", + "visits": 12, + "bounces": 22, + }, + { + "daterangeday": "2020-01-02", + "lastouchchannel": "Natural Search", + "campaign": "Campaign_1", + "visits": 13, + "bounces": 23, + }, + { + "daterangeday": "2020-01-02", + "lastouchchannel": "Natural Search", + "campaign": "Campaign_2", + "visits": 14, + "bounces": 24, + }, + ] + for output_record, expected_record in zip(output.readlines(), iter(expected)): + self.assertEqual(output_record, expected_record) diff --git a/tests/readers/test_facebook_reader.py b/tests/readers/test_facebook_reader.py index cd96685b..f2b4a282 100644 --- a/tests/readers/test_facebook_reader.py +++ b/tests/readers/test_facebook_reader.py @@ -15,88 +15,234 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + from unittest import TestCase, mock -from freezegun import freeze_time +from parameterized import parameterized +from click import ClickException -from nck.readers.facebook_reader import FacebookMarketingReader +from nck.readers.facebook_reader import FacebookReader from facebook_business.api import FacebookAdsApi from facebook_business.adobjects.adsinsights import AdsInsights +from facebook_business.adobjects.ad import Ad class FacebookReaderTest(TestCase): - DATEFORMAT = "%Y-%m-%d" - def mock_facebook_reader(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) + DATEFORMAT = "%Y-%m-%d" kwargs = { - "ad_insights": True, "app_id": "", "app_secret": "", - "access_token": "", - "ad_object_ids": "123456789", - "recurse_level": 0, - "ad_object_type": "adaccount", - "desired_fields": ["date_start", "impressions"], + "access_token": "123456789", + "object_id": ["123456789"], + "object_type": "account", + "level": "ad", + "ad_insights": True, + "breakdown": [], + "action_breakdown": [], + "field": [], + "time_increment": None, + "start_date": None, + "end_date": None, + "date_preset": None, "add_date_to_report": False, } - @mock.patch("nck.readers.facebook_reader.FacebookMarketingReader.run_query_on_fb_account_obj") - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - @mock.patch.object(FacebookMarketingReader, "get_params", lambda *args: None) - @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) - def test_read_empty_data(self, mock_query): - reader = FacebookMarketingReader(**self.kwargs) - mock_query.return_value = [] - if len(list(reader.read())) > 1: - assert False, "Data is not empty" - - @mock.patch("nck.readers.facebook_reader.FacebookMarketingReader.run_query_on_fb_account_obj") - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - @mock.patch.object(FacebookMarketingReader, "get_params", lambda *args: None) + @parameterized.expand( + [ + ("incompatible_level", {"object_type": "ad", "level": "account"}), + ( + "missing_breakdown", + {"ad_insights": True, "field": ["age"], "breakdown": []}, + ), + ( + "missing_action_breakdown", + { + "ad_insights": True, + "field": ["actions[action_type:link_click]"], + "action_breakdown": [], + }, + ), + ( + "creative_level_for_adinsights_query", + {"ad_insights": True, "object_type": "creative", "level": "creative"}, + ), + ( + "breakdown_for_object_node_query", + {"ad_insights": False, "breakdown": ["age"]}, + ), + ( + "action_breakdown_for_object_node_query", + {"ad_insights": False, "action_breakdown": ["action_type"]}, + ), + ( + "time_increment_for_object_node_query", + {"ad_insights": False, "time_increment": "1"}, + ), + ] + ) + def test_refuse_invalid_input(self, name, parameters): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update(parameters) + with self.assertRaises(ClickException): + FacebookReader(**temp_kwargs) + + def test_get_api_fields(self): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + { + "field": [ + "impressions", + "link_url_asset[website_url]", + "actions[action_type:link_click]", + ], + "breakdown": ["link_url_asset"], + "action_breakdown": ["action_type"], + } + ) + expected = ["impressions", "actions"] + self.assertEqual(set(FacebookReader(**temp_kwargs)._api_fields), set(expected)) + + def test_get_field_paths(self): + + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + { + "field": [ + "impressions", + "link_url_asset[website_url]", + "actions[action_type:link_click]", + ], + "breakdown": ["link_url_asset"], + "action_breakdown": ["action_type"], + } + ) + expected = [ + ["impressions"], + ["link_url_asset", "website_url"], + ["actions", "action_type:link_click"], + ] + self.assertEqual(FacebookReader(**temp_kwargs)._field_paths, expected) + + @mock.patch("nck.readers.facebook_reader.FacebookReader.query_ad_insights") + @mock.patch.object(FacebookReader, "get_params", lambda *args: {}) @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) - def test_read_data(self, mock_query): - reader = FacebookMarketingReader(**self.kwargs) + def test_read_with_ad_insights_query(self, mock_query_ad_insights): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + {"ad_insights": True, "field": ["date_start", "impressions"]} + ) + row1, row2 = AdsInsights(), AdsInsights() - row1.set_data({"date_start": "2019-01-01", "impressions": "1"}) - row2.set_data({"date_start": "2019-01-01", "impressions": "2"}) - mock_query.return_value = [row1, row2] - - expected = [{"date_start": "2019-01-01", "impressions": "1"}, {"date_start": "2019-01-01", "impressions": "2"}] - - data = next(reader.read()) - assert len(list(data.readlines())) != 0 - data = next(reader.read()) - for record, output in zip(data.readlines(), iter(expected)): - assert record == output - - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - def test_format_standard_field(self): - kwargs = {"desired_fields": ["clicks", "gender", "impressions"], "add_date_to_report": False} - record = {"clicks": "0", "date_start": "2020-01-01", "gender": "unknown", "impressions": "300"} - expected = {"clicks": "0", "gender": "unknown", "impressions": "300"} - assert next(FacebookMarketingReader(**kwargs).format_and_yield(record)) == expected - - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - def test_format_nested_field(self): - kwargs = {"desired_fields": ["outbound_clicks"], "add_date_to_report": False} - record = {"outbound_clicks": [{"action_type": "outbound_click", "value": "1"}]} - expected = {"outbound_clicks": "1"} - assert next(FacebookMarketingReader(**kwargs).format_and_yield(record)) == expected - - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - def test_format_field_not_in_report(self): - kwargs = {"desired_fields": ["age", "outbound_clicks"], "add_date_to_report": False} - record = {"gender": "unknown"} - expected = {"age": None, "outbound_clicks": None} - assert next(FacebookMarketingReader(**kwargs).format_and_yield(record)) == expected - - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - @freeze_time("2020-01-01") - def test_add_date_to_report_in_report(self): - kwargs = {"desired_fields": ["clicks"], "add_date_to_report": True} - record = {"clicks": "0", "date_start": "2020-01-01"} - expected = {"clicks": "0", "date": "2020-01-01"} - assert next(FacebookMarketingReader(**kwargs).format_and_yield(record)) == expected + row1.set_data({"date_start": "2020-01-01", "impressions": "1"}) + row2.set_data({"date_start": "2020-01-01", "impressions": "2"}) + mock_query_ad_insights.return_value = [row1, row2] + + data = next(FacebookReader(**temp_kwargs).read()) + expected = [ + {"date_start": "2020-01-01", "impressions": "1"}, + {"date_start": "2020-01-01", "impressions": "2"}, + ] + + for record, report in zip(data.readlines(), iter(expected)): + self.assertEqual(record, report) + + @mock.patch("nck.readers.facebook_reader.FacebookReader.query_object_node") + @mock.patch.object(FacebookReader, "get_params", lambda *args: {}) + @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) + def test_read_with_object_node_query(self, mock_query_object_node): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"ad_insights": False, "field": ["id", "status"]}) + + row1, row2 = Ad(), Ad() + row1.set_data({"id": "123456789", "status": "ACTIVE"}) + row2.set_data({"id": "987654321", "status": "PAUSED"}) + mock_query_object_node.return_value = [row1, row2] + + data = next(FacebookReader(**temp_kwargs).read()) + expected = [ + {"id": "123456789", "status": "ACTIVE"}, + {"id": "987654321", "status": "PAUSED"}, + ] + + for record, report in zip(data.readlines(), iter(expected)): + self.assertEqual(record, report) + + @parameterized.expand( + [ + ( + "simple_field", + {"field": ["impressions"], "action_breakdown": []}, + {"impressions": "10314"}, + {"impressions": "10314"}, + ), + ( + "nested_field", + {"field": ["creative[id]"], "action_breakdown": []}, + {"creative": {"id": "123456789"}}, + {"creative[id]": "123456789"}, + ), + ( + "action_breakdown_field_without_filters", + { + "field": ["actions"], + "action_breakdown": ["action_type", "action_device"], + }, + { + "actions": [ + {"action_type": "link_click", "value": "0"}, + {"action_type": "post_engagement", "value": "1"}, + ] + }, + { + "actions[action_type:link_click]": "0", + "actions[action_type:post_engagement]": "1", + }, + ), + ( + "action_breakdown_field_without_filters", + { + "field": ["actions[action_type:link_click][action_device:iphone]"], + "action_breakdown": ["action_type", "action_device"], + }, + { + "actions": [ + { + "action_type": "link_click", + "action_device": "iphone", + "value": "0", + }, + { + "action_type": "post_engagement", + "action_device": "iphone", + "value": "1", + }, + { + "action_type": "link_click", + "action_device": "desktop", + "value": "2", + }, + { + "action_type": "post_engagement", + "action_device": "desktop", + "value": "3", + }, + ] + }, + {"actions[action_type:link_click][action_device:iphone]": "0"}, + ), + ( + "field_not_in_record", + {"field": ["impressions", "clicks"], "action_breakdown": []}, + {"impressions": "1"}, + {"impressions": "1"}, + ), + ] + ) + def test_format_and_yield(self, name, parameters, record, expected): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update(parameters) + self.assertEqual( + next(FacebookReader(**temp_kwargs).format_and_yield(record)), expected + )