diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..c02bed5f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,14 @@ +**/.github +**/tests +**/.env +**/.flake8 +**/.gitignore +**/.git +CONTRIBUTING.md +README.md +**/__pycache__ +**/*.pyc +**/.settings +**/.vscode +**/Dockerfile* +**/requirements-dev.txt \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..19a3fd3d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: https://github.com/ambv/black + rev: stable + hooks: + - id: black + entry: black + language_version: python3.7 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v1.2.3 + hooks: + - id: flake8 diff --git a/README.md b/README.md index e61b141c..a2398ba6 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Nautilus connectors kit is a tool which aim is getting raw data from different s - MySQL - Radarly - SalesForce +- The Trade Desk - Twitter Ads - Yandex Campaign - Yandex Statistics diff --git a/documentation/images/credentials_gs.png b/documentation/images/credentials_gs.png new file mode 100644 index 00000000..34373a1e Binary files /dev/null and b/documentation/images/credentials_gs.png differ diff --git a/nck/entrypoint.py b/nck/entrypoint.py index 865ebdd9..cb3f6a5a 100644 --- a/nck/entrypoint.py +++ b/nck/entrypoint.py @@ -59,7 +59,6 @@ def run(processors, state_service_name, state_service_host, state_service_port, raise click.BadParameter("You must specify at least one writer") reader = _readers[0] - # A stream should represent a full file! for stream in reader.read(): for writer in _writers: diff --git a/nck/helpers/dbm_helper.py b/nck/helpers/dbm_helper.py index 6042a8e1..8d321cd2 100644 --- a/nck/helpers/dbm_helper.py +++ b/nck/helpers/dbm_helper.py @@ -21,23 +21,5 @@ "existing_query_report", "custom_query_report", "lineitems_objects", - "sdf_objects", "list_reports", ] - -POSSIBLE_SDF_FILE_TYPES = [ - "INVENTORY_SOURCE", - "AD", - "AD_GROUP", - "CAMPAIGN", - "INSERTION_ORDER", - "LINE_ITEM", -] - -FILE_TYPES_DICT = { - "AD": "ads", - "AD_GROUP": "adGroups", - "CAMPAIGN": "campaigns", - "LINE_ITEM": "lineItems", - "INSERTION_ORDER": "insertionOrders", -} diff --git a/nck/helpers/dv360_helper.py b/nck/helpers/dv360_helper.py new file mode 100644 index 00000000..14b0d608 --- /dev/null +++ b/nck/helpers/dv360_helper.py @@ -0,0 +1,20 @@ +FILE_NAMES = { + "FILE_TYPE_INSERTION_ORDER": "InsertionOrders", + "FILE_TYPE_CAMPAIGN": "Campaigns", + "FILE_TYPE_MEDIA_PRODUCT": "MediaProducts", + "FILE_TYPE_LINE_ITEM": "LineItems", + "FILE_TYPE_AD_GROUP": "AdGroups", + "FILE_TYPE_AD": "AdGroupAds" +} + +FILE_TYPES = FILE_NAMES.keys() + +FILTER_TYPES = [ + "FILTER_TYPE_UNSPECIFIED", + "FILTER_TYPE_NONE", + "FILTER_TYPE_ADVERTISER_ID", + "FILTER_TYPE_CAMPAIGN_ID", + "FILTER_TYPE_MEDIA_PRODUCT_ID", + "FILTER_TYPE_INSERTION_ORDER_ID", + "FILTER_TYPE_LINE_ITEM_ID" +] diff --git a/nck/helpers/ttd_helper.py b/nck/helpers/ttd_helper.py new file mode 100644 index 00000000..0a0dbea2 --- /dev/null +++ b/nck/helpers/ttd_helper.py @@ -0,0 +1,64 @@ +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import logging +from datetime import datetime + +API_HOST = "https://api.thetradedesk.com/v3" + +API_ENDPOINTS = { + "get_report_template_id": ("POST", "myreports/reporttemplateheader/query"), + "create_report_schedule": ("POST", "myreports/reportschedule"), + "get_report_execution_details": ( + "POST", + "myreports/reportexecution/query/advertisers", + ), + "delete_report_schedule": ("DELETE", "/myreports/reportschedule"), +} + +DEFAULT_REPORT_SCHEDULE_ARGS = { + "ReportFileFormat": "CSV", + "ReportDateRange": "Custom", + "TimeZone": "UTC", + "ReportDateFormat": "Sortable", + "ReportNumericFormat": "US", + "IncludeHeaders": True, + "ReportFrequency": "Once", +} + +DEFAULT_PAGING_ARGS = { + "PageStartIndex": 0, + "PageSize": 10, +} + +API_DATEFORMAT = "%Y-%m-%dT%H:%M:%S" +BQ_DATEFORMAT = "%Y-%m-%d" + + +class ReportTemplateNotFoundError(Exception): + def __init__(self, message): + super().__init__(message) + logging.error(message) + + +class ReportScheduleNotReadyError(Exception): + def __init__(self, message): + super().__init__(message) + logging.error(message) + + +def format_date(date_string): + """ + Input: "2020-01-01T00:00:00" + Output: "2020-01-01" + """ + return datetime.strptime(date_string, API_DATEFORMAT).strftime(BQ_DATEFORMAT) diff --git a/nck/helpers/twitter_helper.py b/nck/helpers/twitter_helper.py new file mode 100644 index 00000000..042752a6 --- /dev/null +++ b/nck/helpers/twitter_helper.py @@ -0,0 +1,72 @@ +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from twitter_ads.campaign import FundingInstrument, Campaign, LineItem +from twitter_ads.creative import MediaCreative, PromotedTweet, CardsFetch + + +REPORT_TYPES = ["ANALYTICS", "REACH", "ENTITY"] + +ENTITY_OBJECTS = { + "FUNDING_INSTRUMENT": FundingInstrument, + "CAMPAIGN": Campaign, + "LINE_ITEM": LineItem, + "MEDIA_CREATIVE": MediaCreative, + "PROMOTED_TWEET": PromotedTweet, +} + +ENTITY_ATTRIBUTES = { + **{ + entity: list(ENTITY_OBJECTS[entity].__dict__["PROPERTIES"].keys()) + for entity in ENTITY_OBJECTS + }, + "CARD": list(CardsFetch.__dict__["PROPERTIES"].keys()), +} + +GRANULARITIES = ["DAY", "TOTAL"] + +METRIC_GROUPS = [ + "ENGAGEMENT", + "BILLING", + "VIDEO", + "MEDIA", + "MOBILE_CONVERSION", + "WEB_CONVERSION", + "LIFE_TIME_VALUE_MOBILE_CONVERSION", +] + +PLACEMENTS = [ + "ALL_ON_TWITTER", + "PUBLISHER_NETWORK", +] + +SEGMENTATION_TYPES = [ + "AGE", + "APP_STORE_CATEGORY", + "AUDIENCES", + "CONVERSATIONS", + "CONVERSION_TAGS", + "DEVICES", + "EVENTS", + "GENDER", + "INTERESTS", + "KEYWORDS", + "LANGUAGES", + "LOCATIONS", + "METROS", + "PLATFORMS", + "PLATFORM_VERSIONS", + "POSTAL_CODES", + "REGIONS", + "SIMILAR_TO_FOLLOWERS_OF_USER", + "TV_SHOWS", +] diff --git a/nck/readers/README.md b/nck/readers/README.md index a5ebed1b..535b6198 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -2,6 +2,29 @@ Each reader role is to read data from external source and transform it into a Stream understable format to be written on GCS and BQ thanks to the corresponding writers. +## List of Readers + +- Adobe Analytics 1.4 +- Adobe Analytics 2.0 +- Amazon S3 +- Facebook Marketing +- Google Ads +- Google Analytics +- Google Cloud Storage +- Google Campaign Manager +- Google Display & Video 360 +- Google Search Ads 360 +- Google Search Console +- Google Sheets +- Oracle +- MySQL +- Radarly +- SalesForce +- The Trade Desk +- Twitter Ads +- Yandex Campaign +- Yandex Statistics + ## Step to create a new Reader 1. Create python module following naming nomenclature ``` [command]_reader.py ``` @@ -10,11 +33,109 @@ Each reader role is to read data from external source and transform it into a St 4. Reference click command into [commands list](./__init__.py) 5. Update current README.md -## Facebook Reader +## Adobe Analytics Readers + +As of May 2020 (last update of this section of the documentation), **two versions of Adobe Analytics Reporting API are coexisting: 1.4 and 2.0**. As some functionalities of API 1.4 have not been made available in API 2.0 yet (Data Warehouse reports in particular), our Adobe Analytics Readers are also available in these two versions. + +#### How to obtain credentials + +Both Adobe Analytics Readers use the **JWT authentication framework**. +- Get developer access to Adobe Analytics (documentation can be found [here](https://helpx.adobe.com/enterprise/using/manage-developers.html)) +- Create a Service Account integration to Adobe Analytics on [Adobe Developer Console](https://console.adobe.io/) +- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID, Organization ID and private.key file) to retrieve your Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)). All these parameters will be passed to Adobe Analytics Readers. + +### Adobe Analytics Reader 1.4 + +#### Source API + +[Analytics API v1.4](https://github.com/AdobeDocs/analytics-1.4-apis) #### Quickstart -The Facebook Reader handles calls to 2 endpoints of the Facebook Marketing API: **Facebook Ad Insights** (to retrieve performance data), and **Facebook Ad Management** (to retrieve configuration data). +Call example to Adobe Analytics Reader 1.4, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: + +``` +python nck/entrypoint.py read_adobe --adobe-client-id --adobe-client-secret --adobe-tech-account-id --adobe-org-id --adobe-private-key --adobe-global-company-id --adobe-report-suite-id --adobe-date-granularity day --adobe-report-element-id trackingcode --adobe-report-metric-id visits --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 write_console +``` + +Didn't work? See [troubleshooting](#troubleshooting) section. + +#### Parameters + +|CLI option|Documentation| +|--|--| +|`--adobe-client-id`|Client ID, that you can find on Adobe Developer Console| +|`--adobe-client-secret`|Client Secret, that you can find on Adobe Developer Console| +|`--adobe-tech-account-id`|Technical Account ID, that you can find on Adobe Developer Console| +|`--adobe-org-id`|Organization ID, that you can find on Adobe Developer Console| +|`--adobe-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| +|`--adobe-global-company-id`|Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md))| +|`--adobe-list-report-suite`|Should be set to *True* if you wish to request the list of available Adobe Report Suites (*default: False*). If set to *True*, the below parameters should be left empty.| +|`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| +|`--adobe-report-element-id`|ID of the element (i.e. dimension) to include in the report| +|`--adobe-report-metric-id`|ID of the metric to include in the report| +|`--adobe-date-granularity`|Granularity of the report. *Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS*| +|`--adobe-start-date`|Start date of the period to request (format: YYYY-MM-DD)| +|`--adobe-end-date`|End date of the period to request (format: YYYY-MM-DD)| + +#### Addtional information + +- **The full list of available elements and metrics** can be retrieved with the [GetElements](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetElements.md) and [GetMetrics](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetMetrics.md) methods. +- **Adobe Analytics Reader 1.4 requests Data Warehouse reports** (the "source" parameter is set to "warehouse" in the report description), allowing it to efficiently process multiple-dimension requests. +- **If you need further information**, the documentation of Adobe APIs 1.4 can be found [here](https://github.com/AdobeDocs/analytics-1.4-apis). + +### Adobe Analytics Reader 2.0 + +#### Source API + +[Analytics API v2.0](https://github.com/AdobeDocs/analytics-2.0-apis) + +#### Quickstart + +Call example to Adobe Analytics Reader 2.0, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: + +``` +python nck/entrypoint.py read_adobe_2_0 --adobe-2-0-client-id --adobe-2-0-client-secret --adobe-2-0-tech-account-id --adobe-2-0-org-id --adobe-2-0-private-key --adobe-2-0-global-company-id --adobe-2-0-report-suite-id --adobe-2-0-dimension daterangeday --adobe-2-0-dimension campaign --adobe-2-0-start-date 2020-01-01 --adobe-2-0-end-date 2020-01-31 --adobe-2-0-metric visits write_console +``` + +Didn't work? See [troubleshooting](#troubleshooting) section. + +#### Parameters + +|CLI option|Documentation| +|--|--| +|`--adobe-2-0-client-id`|Client ID, that you can find on Adobe Developer Console| +|`--adobe-2-0-client-secret`|Client Secret, that you can find on Adobe Developer Console| +|`--adobe-2-0-tech-account-id`|Technical Account ID, that you can find on Adobe Developer Console| +|`--adobe-2-0-org-id`|Organization ID, that you can find on Adobe Developer Console| +|`--adobe-2-0-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| +|`--adobe-2-0-global-company-id`|Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md))| +|`--adobe-2-0-report-suite-id`|ID of the requested Adobe Report Suite| +|`--adobe-2-0-dimension`|Dimension to include in the report| +|`--adobe-2-0-metric`|Metric to include in the report| +|`--adobe-2-0-start-date`|Start date of the period to request (format: YYYY-MM-DD)| +|`--adobe-2-0-end-date`|Start date of the period to request (format: YYYY-MM-DD)| + +#### Additional information + +- **In API 2.0, dimension and metric names are slightly different from API 1.4**. To get new metric and dimension names and reproduce the behavior of Adobe Analytics UI as closely as possible, [enable the Debugger feature in Adobe Analytics Workspace](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md): it allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. +- **In API 2.0, the date granularity parameter was removed, and should now be handled as a dimension**: a request featuring `--adobe-dimension daterangeday` will produce a report with a day granularity. +- **API 2.0 does not feature Data Warehouse reports yet** (along with other features, that are indicated on the "Current limitations" section of [this page](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/migration-guide.md)). For this reason, if you wish to collect multiple-dimension reports, Adobe Analytics Reader 1.4 might be a more efficient solution in terms of processing time. +- **If you need any further information**, the documentation of Adobe APIs 2.0 can be found [here](https://github.com/AdobeDocs/analytics-2.0-apis). + +## Amazon S3 Reader + +*Not documented yet.* + +## Facebook Marketing Reader + +#### Source API + +[Facebook Marketing API](https://developers.facebook.com/docs/marketing-api/reference/v7.0) + +#### Quickstart + +The Facebook Marketing Reader handles calls to 2 endpoints of the Facebook Marketing API: **Facebook Ad Insights** (to retrieve performance data), and **Facebook Ad Management** (to retrieve configuration data). *Example of Ad Insights Request* ``` @@ -26,6 +147,8 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- python nck/entrypoint.py read_facebook --facebook-access-token --facebook-object-id --facebook-ad-insights False --facebook-level ad --facebook-field id --facebook-field creative[id] --facebook-add-date-to-report True --facebook-start-date 2020-01-01 --facebook-end-date 2019-01-01 write_console ``` +Didn't work? See [troubleshooting](#troubleshooting) section. + #### Parameters |CLI option|Documentation| @@ -33,22 +156,22 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- |`--facebook-app-id`|Facebook App ID. *Not mandatory if Facebook Access Token is provided.*| |`--facebook-app-secret`|Facebook App Secret. *Not mandatory if Facebook Access Token is provided.*| |`--facebook-access-token`|Facebook App Access Token.| -|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Supported values: creative (available only for Ad Management requests), ad, adset, campaign, account (default).*| +|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Possible values: creative (available only for Ad Management requests), ad, adset, campaign, account (default).*| |`--facebook-object-id`|ID of the root Facebook Object used to make the request.| -|`--facebook-level`|Granularity of the response. *Supported values: creative (available only for Ad Management requests), ad (default), adset, campaign or account.*| +|`--facebook-level`|Granularity of the response. *Possible values: creative (available only for Ad Management requests), ad (default), adset, campaign, account.*| |`--facebook-ad-insights`|*True* (default) if *Ad Insights* request, *False* if *Ad Management* request.| |`--facebook-field`|Fields to be retrieved.| -|`--facebook-start-date`|Start date of the requested time range. *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| -|`--facebook-end-date`|End date of the requested time range. *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| +|`--facebook-start-date`|Start date of the period to request (format: YYYY-MM-DD). *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| +|`--facebook-end-date`|Start date of the period to request (format: YYYY-MM-DD). *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| |`--facebook-date-preset`|Relative time range. Ignored if *--facebook-start date* and *--facebook-end-date* are specified. *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| |`--facebook-time-increment`|Cuts the results between smaller time slices within the specified time range. *This parameter is only relevant for Ad Insights Requests, and Ad Management requests at the Campaign, Adset and Ad levels.*| |`--facebook-add-date-to-report`|*True* if you wish to add the date of the request to each response record, *False* otherwise (default).| |`--facebook-breakdown`|How to break down the result. *This parameter is only relevant for Ad Insights Requests.*| |`--facebook-action-breakdown`|How to break down action results. *This parameter is only relevant for Ad Insights Requests.*| -#### Additional details for a relevant use of the Facebook Reader +#### Additional information -**#1: Make sure to select the appropriate `--facebook-level`** +**1. Make sure to select the appropriate `--facebook-level`** |If Facebook Object Type is...|Facebook Level can be...| |:--|:--| @@ -58,7 +181,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- |`ad`|ad, creative| |`creative`|creative| -**#2: Format Facebook Reader response using `--facebook-fields`** +**2. Format Facebook Marketing Reader response using `--facebook-fields`** 2.1. The list of **applicable fields** can be found on the links below: @@ -67,7 +190,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- 2.2. If you want to select **a nested field value**, simply indicate the path to this value within the request field. -*Facebook Reader Request* +*Facebook Marketing Reader Request* ``` --facebook-field object_story_spec[video_data][call_to_action][value][link] ``` @@ -87,14 +210,14 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- } ``` -*Facebook Reader Response* +*Facebook Marketing Reader Response* ``` {"object_story_spec_video_data_call_to_action_value_link": "https://www.artefact.com"} ``` -(2.3) **Action Breakdown filters** can be applied to the fields of ***Ad Insights* Requests** using the following syntax: [:]. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. +2.3 **Action Breakdown filters** can be applied to the fields of ***Ad Insights* Requests** using the following syntax: [:]. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. -*Facebook Reader Request* +*Facebook Marketing Reader Request* ``` --facebook-action-breakdown action_type --facebook-field actions[action_type:video_view][action_type:post_engagement] @@ -120,9 +243,9 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- "value": "12" } ] -``` -*Facebook Reader Response* +``` +*Facebook Marketing Reader Response* ``` {"actions_action_type_video_view": "17", "actions_action_type_post_engagement": "25"} ``` @@ -131,317 +254,489 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- ### Authentication -You can authenticate to most of the readers of the google -suite following the same schema. You'll need to generate a **refresh token** to connect -via the oAuth flow. A full script to do this can be found here: - -[Refresh token generator](https://github.com/artefactory/Refresh-token-generator-for-google-oauth) - +You can authenticate to most of the Readers of the Google Suite following the same schema. You'll need to generate a **refresh token** to connect via the OAuth flow. A full script to do this can be found in this [refresh token generator](https://github.com/artefactory/Refresh-token-generator-for-google-oauth). ### Google Ads Reader -#### How to obtain Credentials +#### Source API +[AdWords API](https://developers.google.com/adwords/api/docs/guides/start) -Using the Google Ads API requires four things: -- A developer token (Generated at a company level - one per company -, takes around 2 days to be approved by Google) which can be completely independant from the Google Ads Account you will be calling (though you need a Manager Google Ads Account to request a token for your company) +#### How to obtain credentials +Using the AdWords API requires four things: +- A developer token (Generated at a company level - one per company -, takes around 2 days to be approved by Google) which can be completely independant from the Google Ads Account you will be calling (though you need a Manager Google Ads Account to request a token for your company) - OAuth2 credentials: and - - A refresh token, created with the email address able to access to all the Google Ads Account you will be calling +- The ID of the Google Ads Accounts you will be reading from (XXX-XXX-XXXX numbers, written right next to your Account Name) -- The ID of the GAds Accounts you will be reading from (XXX-XXX-XXXX numbers, written right next to your Account Name) +See the [documentation here](https://developers.google.com/adwords/api/docs/guides/signup) to apply for access if your Company does not already have a developer token (granting you the right to use the API). -See the [documentation here](https://developers.google.com/adwords/api/docs/guides/signup "Sign Up for Google Ads API") -to apply for access if your Company does not already have a developer token (granting you the right to use the API). +See the [documentation here](https://developers.google.com/adwords/api/docs/guides/first-api-call) to set-up your OAuth2 credentials and refresh token specifically for your Google Ads Accounts. -See the [documentation here](https://developers.google.com/adwords/api/docs/guides/first-api-call "Make your first API call") -to set-up your OAuth2 credentials and refresh token specifically for your Google Ads Accounts. +#### Quickstart +The following command retrieves insights about the Ads of *my_first_campaign* and *my_second_campaign* in the Google Ads Account , thanks to your company , , and with the necessary permissions to access your Accounts. -#### Which Reports and Metrics are available in the API +``` +python nck/entrypoint.py read_googleads --googleads-developer-token --googleads-client-id --googleads-client-secret --googleads-refresh-token --googleads-client-customer-id --googleads-report-type AD_PERFORMANCE_REPORT --googleads-date-range-type LAST_7_DAYS --googleads-field CampaignName --googleads-field AdGroupName --googleads-field Headline --googleads-field Date --googleads-field Impressions --googleads-report-filter "{'field':'CampaignName','operator':'IN','values':['my_first_campaign','my_second_campaign']}" +``` -The list of available reports for the API, and the associated metrics, can be [found here](https://developers.google.com/adwords/api/docs/appendix/reports#available-reports "Report Types") +Didn't work? See [troubleshooting](#troubleshooting) section. -#### Simple API call example +#### Parameters -- Call Example +|CLI option|Documentation| +|--|--| +|`--googleads-developer-token`|Company Developer token for Google Ads API| +|`--googleads-client-id`|OAuth2 ID| +|`--googleads-client-secret`|OAuth2 secret| +|`--googleads-refresh-token`|Refresh token for OAuth2| +|`--googleads-manager-id`|(Optional) Manager_Account_ID (XXX-XXX-XXXX identifier)| +|`--googleads-client-customer-id`|GAds_Account_ID (ignored if a manager account ID was given)| +|`--googleads-report-name`|(Optional) Name of your output stream ("Custom Report" by default)| +|`--googleads-report-type`|Type of report to be called| +|`--googleads-date-range-type`|Type of date range to apply (if "CUSTOM_RANGE", a min and max date must be specified). *Possible values can be found [here](https://developers.google.com/adwords/api/docs/guides/reporting#date_ranges).*| +|`--googleads-start-date`|(Optional) Start date for "CUSTOM_RANGE" date range (format: YYYY-MM-DD)| +|`--googleads-end-date`|(Optional) End date for "CUSTOM_RANGE" date range (format: YYYY-MM-DD)| +|`--googleads-field`|Fields to include in the report| +|`--googleads-report-filter`|Filter to apply on a chosen field (Dictionary as String "{'field':,'operator':,'values':}")| +|`--googleads-include-zero-impressions`|Boolean specifying whether or not rows with zero impressions should be included in the report| +|`--googleads-filter-on-video-campaigns`|Boolean used to filter the report on Video Campaigns only (require CampaignId to be listed as a field)| +|`--googleads-include-client-customer-id`|Boolean used to add "AccountId" as a field in the output stream. *AccountId is not available in the API, but is known since it's a requirement to call the API (= Client Customer ID)*| + +See documentation below for a better understanding of the parameters: +- [Reporting basics](https://developers.google.com/adwords/api/docs/guides/reporting#create_a_report_definition) +- [Available reports and associated fields](https://developers.google.com/adwords/api/docs/appendix/reports#available-reports) + +### Google Analytics Reader + +#### Source API + +[Analytics Reporting API](https://developers.google.com/analytics/devguides/reporting/core/v4) +#### Quickstart -The following command retrieves insights about the Ads of *my_first_campaign* and *my_second_campaign* in the Google Ads Account thanks to -your company , and your , and with the necessary permissions to access your Accounts. +The following command retrieves sessions, pageviews and bounces volumes by date from 2020-01-01 to 2020-01-03, for the Analytics View , thanks your , and with the necessary permissions to access your accounts. ``` -python nck/entrypoint.py read_googleads --googleads-developer-token --googleads-client-id --googleads-client-secret --googleads-refresh-token --googleads-client-customer-id --googleads-report-type AD_PERFORMANCE_REPORT --googleads-date-range-type LAST_7_DAYS --googleads-field CampaignName --googleads-field AdGroupName --googleads-field Headline --googleads-field Date --googleads-field Impressions --googleads-report-filter "{'field':'CampaignName','operator':'IN','values':['my_first_campaign','my_second_campaign']}" +python nck/entrypoint.py read_ga --ga-client-id --ga-client-secret --ga-view-id --ga-refresh-token --ga-dimension ga:date --ga-metric sessions --ga-metric ga:pageviews --ga-metric ga:bounces --ga-start-date 2020-01-01 --ga-end-date 2020-01-03 write_console ``` -*If it doesn't work, try to* `export PYTHONPATH="."` *in the nautilus-connector-kit folder (to be sure Python is reading correctly)* -*If you want the output to be printed in your console, add* `write_console` *at the end of your command (see writers for more details)* +Didn't work? See [troubleshooting](#troubleshooting) section. -- Parameters of the GoogleAds Readers +#### Parameters +|CLI option|Documentation| +|--|--| +|`--ga-client-id`|OAuth2 ID| +|`--ga-client-secret`|OAuth2 secret| +|`--ga-access-token`|(Optional) Access token for OAuth2| +|`--ga-refresh-token`|Refresh token for OAuth2| +|`--ga-view-id`|Analytics View ID from which to retrieve data. See documentation [here](https://support.google.com/analytics/answer/1009618) for a better understanding of Google Analytics hierrarchy.| +|`--ga-account-id`|Analytics Account ID from which to retrieve data. See documentation [here](https://support.google.com/analytics/answer/1009618) for a better understanding of Google Analytics hierrarchy.| +|`--ga-dimension`|Dimensions to include in the report (max 9). Possible values can be found [here](https://ga-dev-tools.appspot.com/dimensions-metrics-explorer/).| +|`--ga-metric`|Metrics to include in the report (min 1, max 10). Possible values can be found [here](https://ga-dev-tools.appspot.com/dimensions-metrics-explorer/).| +|`--ga-segment-id`|Segment ID of a built-in or custom segment (for example gaid::-3) on which report data should be segmented.| +|`--ga-start-date`|Start date of the period to request (format: YYYY-MM-DD)| +|`--ga-end-date`|End date of the period to request (format: YYYY-MM-DD)| +|`--ga-date-range`| of the period to request, specified as a unique argument (format: YYYY-MM-DD YYYY-MM-DD)| +|`--ga-day-range`|Relative time range. *Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS.*| +|`--ga-sampling-level`|Desired sample size. See documentation [here](https://support.google.com/analytics/answer/2637192) for a better understanding of Google Analytics sampling. *Possible values: SMALL, DEFAULT, LARGE (default).*| +|`--ga-add-view`|If set to *True* (default: False)*, adds a "ga:viewId" field to the output stream.| -| --googleads-developer-token | --googleads-client-id | --googleads-client-secret | --googleads-refresh-token | --googleads-manager-id | --googleads-client-customer-id | --googleads-report-name | --googleads-report-type | --googleads-date-range-type | --googleads-start-date | --googleads-end-date | --googleads-field | --googleads-report-filter | --googleads-include-zero-impressions | --googleads-filter-on-video-campaigns | --googleads-include-client-customer-id | -|:-----------------:|:---------------------:|:-----------------------:|:-----------------------:|:-----------------------:|:-----------------------:|:--------------------:|:---------------------------:|:----------------------:|:-------------------:|:-------------------------:|:----------------:|:------------------------:|:------------------------:|:------------------------:|:------------------------:| -|Company Developer token for Google Ads API |OAuth2 ID| OAuth2 Secret|Refresh token for OAuth2|Manager_Account_ID (XXX-XXX-XXXX identifier) (optional)|GAds_Account_ID (ignored if a manager account ID was given)|Optional Name for your output stream ("Custom Report" by default)|Type of Report to be called|Type of Date Range to apply (if "CUSTOM_RANGE", a min and max date must be specified) |Start Date for "CUSTOM_RANGE" date range (optional)|End Date for "CUSTOM_RANGE" date range (optional)|List of fields to request |Filter to apply on a chosen field (Dictionary as String "{'field':,'operator':,'values':}")|Boolean specifying whether or not rows with zero impressions should be included in report| Boolean used to filter on Video Campaigns only (require CampaignId to be listed as a field) | Boolean used to add "AccountId" as a field in the output stream * | +See documentation [here](https://developers.google.com/analytics/devguides/reporting/core/v4/basics) for a better understanding of the parameters. -\* *AccountId is not available in the API but is known since it's a requirement to call the API (= client customer ID)* +### Google Cloud Storage Reader -See the documents below for a better understanding of the parameters: -- [Google Ads API Reporting Basics](https://developers.google.com/adwords/api/docs/guides/reporting#create_a_report_definition) -- [Possible Date Ranges](https://developers.google.com/adwords/api/docs/guides/reporting#date_ranges) +*Not documented yet.* +### Google Campaign Manager Reader -### Google Search Console Reader +#### Source API -#### How to obtain Credentials +[DCM/DFA Reporting and Trafficking API](https://developers.google.com/doubleclick-advertisers/v3.3) -Using the Google Search Console API requires three main parameters: -- OAuth2 credentials: and +#### Quickstart -- A refresh token, created with the email address able to access to your Google Search Console Account. +The following command retrieves impressions, clicks and cost volumes from 2020-01-01 to 2020-01-03, thanks your , , and with the necessary permissions to access your accounts. + +``` +python nck/entrypoint.py read_dcm --dcm-client-id --dcm-client-secret --dcm-refresh-token --dcm-profile-id --dcm-dimension dfa:date --dcm-metric dfa:impressions --dcm-metric dfa:clicks --dcm-metric dfa:mediaCost --dcm-start-date 2020-01-01 --dcm-end-date 2020-01-03 write_console +``` -- The URLs whose performance you want to see. +Didn't work? See [troubleshooting](#troubleshooting) section. -See the [documentation here](https://developers.google.com/webmaster-tools/search-console-api-original/v3/prereqs "Search Console API") -to see an Overview of the Search Console API. +##### Parameters +|CLI option|Documentation| +|--|--| +|`--dcm-client-id`|OAuth2 ID| +|`--dcm-client-secret`|OAuth2 secret| +|`--dcm-access-token`|(Optional) Access token for OAuth2| +|`--dcm-refresh-token`|Refresh token for OAuth2| +|`--dcm-profile-id`|ID of the DFA user profile that has been granted permissions to the CM account for which you want to retrieve data. You should have 1 DFA user profile per CM account that you can access. The associated ID can be found directly on your Campaign Manager UI (when accessing your list of CM accounts, on the top right hand corner).| +|`--dcm-report-name`|Name of the report, that will appear in CM UI.| +|`--dcm-report-type`|Type of the report. *Possible values: CROSS_DIMENSION_REACH, FLOODLIGHT, PATH_TO_CONVERSION, REACH, STANDARD.*| +|`--dcm-dimension`|Dimensions to include in the report. *Possible values can be found [here](https://developers.google.com/doubleclick-advertisers/v3.3/dimensions).*| +|`--dcm-metric`|Metrics to include in the report. *Possible values can be found [here](https://developers.google.com/doubleclick-advertisers/v3.3/dimensions).*| +|`--dcm-filter`| association, used to narrow the scope of the report. For instance "dfa:advertiserId XXXXX" will narrow report scope to the performance of Advertiser ID XXXXX. *Possible filter types can be found [here](https://developers.google.com/doubleclick-advertisers/v3.3/dimensions).*| +|`--dcm-start-date`|Start date of the period to request (format: YYYY-MM-DD)| +|`--dcm-end-date`|End date of the period to request (format: YYYY-MM-DD)| -#### Search Analytics +### Google DoubleClick Manager Reader (DBM) -The list of available dimensions and metrics in the API can be [found here](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query "Search Analytics") +#### Source API -#### Simple API call example +[Doubleclick Bid Manager API](https://developers.google.com/bid-manager/v1) -- Call Example +#### Quickstart -The following command retrieves insights about the URL thanks to your company and -with the necessary permissions to access your Accounts. +The following command retrieves impressions, clicks and cost volumes filtered on a specific from 2020-01-01 to 2020-01-03, thanks your , and with the necessary permissions to access your accounts. ``` -python nck/entrypoint.py read_search_console --search-console-client-id --search-console-refresh-token --search-console-site-url --search-console-dimensions country --search-console-dimensions device --search-console-start-date 2020-01-01 --search-console-end-date 2020-01-01 write_console +python nck/entrypoint.py read_dbm --dbm-client-id --dbm-client-secret —dbm-refresh-token —dbm-filter FILTER_ADVERTISER --dbm-query-dimension FILTER_DATE --dbm-query-metric METRIC_IMPRESSIONS --dbm-query-metric METRIC_CLICKS --dbm-query-metric METRIC_MEDIA_COST_ADVERTISER --dbm-query-param-type TYPE_GENERAL --dbm-request-type custom_query_report --dbm-start-date 2020-01-01 --dbm-end-date 2020-01-03 write_console ``` -- Parameters of the Google Search Console Readers +Didn't work? See [troubleshooting](#troubleshooting) section. -| --search-console-client-id | --search-console-client-secret | --search-console-access-token | --search-console-refresh-token | --search-console-dimensions | --search-console-site-url | --search-console-start-date | --search-console-end-date | --search-console-date-column | --search-console-row-limit | -|:-----------------:|:---------------------:|:-----------------------:|:-----------------------:|:-----------------------:|:-----------------------:|:--------------------:|:---------------------------:|:----------------------:|:----------------------:| -|OAuth2 ID| OAuth2 Secret| Access token | Refresh token for OAuth2 | [Dimensions to request](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query#dimensionFilterGroups.filters.dimension) |Site URL whose performance you want to request| Start Date for the request | End Date for the request | If true, include date column in the report | Row number by report page | +#### Parameters -See the documents below for a better understanding of the parameters: -- [Google Search Console API](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query) +|CLI option|Documentation| +|--|--| +|`--dbm-client-id`|OAuth2 ID| +|`--dbm-client-secret`|OAuth2 secret| +|`--dbm-access-token`|(Optional) Access token for OAuth2| +|`--dbm-refresh-token`|Refresh token for OAuth2| +|`--dbm-query-request-type`|Doubleclick Bid Manager API request type. *Possible values: existing_query, custom_query, existing_query_report, custom_query_report, lineitems_objects, sdf_objects and list_reports.*| +|`--dbm-query-id`|Query ID.| +|`--dbm-query-title`|Query title, used to name the reports generated from this query in DV360 UI.| +|`--dbm-query-frequency`|How often the query is run. *Possible values can be found [here](https://developers.google.com/bid-manager/v1/queries#schedule.frequency). Default: ONE_TIME.*| +|`--dbm-filter`| association, used to narrow the scope of the report. For instance "FILTER_ADVERTISER XXXXX" will narrow report scope to the performance of Advertiser ID XXXXX. *Possible filter types can be found [here](https://developers.google.com/bid-manager/v1/filters-metrics#filters).*| +|`--dbm-query-dimension`|Dimensions to include in the report. *Possible values can be found [here](https://developers.google.com/bid-manager/v1/filters-metrics#filters).*| +|`--dbm-query-metric`|Metrics to include in the report. *Possible values can be found [here](https://developers.google.com/bid-manager/v1/filters-metrics#metrics).*| +|`--dbm-query-param-type`|Report type. *Possible values can be found [here](https://developers.google.com/bid-manager/v1/queries#params.type). Default: TYPE_TRUEVIEW.*| +|`--dbm-start-date`|Start date of the period to request (format: YYYY-MM-DD)| +|`--dbm-end-date`|End date of the period to request (format: YYYY-MM-DD)| +### Google Search Console Reader -### Search Ads 360 Reader (SA360) +#### Source API -#### How to obtain Credentials +[Search Console API (Search Analytics endpoint)](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/) -Using the Search Ads API requires two things: +#### How to obtain credentials +Using the Google Search Console API requires three main parameters: - OAuth2 credentials: and +- A refresh token, created with the email address able to access to your Google Search Console Account. +- The URLs whose performance you want to see -- A refresh token, created with the email address able to access to all the Search Ads 360 Account you will be calling +#### Quickstart -See the [documentation here](https://developers.google.com/search-ads/v2/authorizing "SA360 Authentication") -to set-up your OAuth2 credentials and refresh token specifically for Search Ads 360 Reporting. +The following command retrieves insights about the URL from 2020-01-01 to 2020-01-03, thanks to your and with the necessary permissions to access your accounts. +``` +python nck/entrypoint.py read_search_console --search-console-client-id --search-console-refresh-token --search-console-site-url --search-console-dimensions country --search-console-dimensions device --search-console-start-date 2020-01-01 --search-console-end-date 2020-01-03 write_console +``` -#### Which Reports and Metrics are available in the API +Didn't work? See [troubleshooting](#troubleshooting) section. -The list of available reports for the API, and the associated metrics, can be [found here](https://developers.google.com/search-ads/v2/report-types "Report Types") +#### Parameters -#### Simple API call example +|CLI option|Documentation| +|--|--| +|`--search-console-client-id`|OAuth2 ID| +|`--search-console-client-secret`|OAuth2 secret| +|`--search-console-access-token`|Access token for OAuth2| +|`--search-console-refresh-token`|Refresh token for OAuth2| +|`--search-console-dimensions`|Dimensions of the report. *Possible values can be found [here](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query#dimensionFilterGroups.filters.dimension).*| +|`--search-console-site-url`|Site URL whose performance you want to request| +|`--search-console-start-date`|Start date of the period to request (format: YYYY-MM-DD)| +|`--search-console-end-date`|End date of the period to request (format: YYYY-MM-DD)| +|`--search-console-date-column`|If set to *True*, a date column will be included in the report| +|`--search-console-row-limit`|Row number by report page| + +See documentation [here](https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics/query) for a better understanding of the parameters. -- Call Example +### Google Search Ads 360 Reader +#### Source API -The following command retrieves insights about the Ads in the Search Ads 360 Account from the agency thanks to -your , and with the necessary permissions to access your Accounts. +[Search Ads 360 API](https://developers.google.com/search-ads/v2/reference) + +#### How to obtain credentials + +Using the Search Ads API requires two things: +- OAuth2 credentials: and +- A refresh token, created with the email address able to access to all the Search Ads 360 Account you will be calling + +See the [documentation here](https://developers.google.com/search-ads/v2/authorizing "SA360 Authentication") +to set-up your OAuth2 credentials and refresh token specifically for Search Ads 360 Reporting. + +#### Quickstart + +The following command retrieves insights about the Ads in the Search Ads 360 Account from the agency thanks to your , and with the necessary permissions to access your accounts. ``` python nck/entrypoint.py read_sa360 --sa360-client-id --sa360-client-secret --sa360-refresh-token --sa360-agency-id --sa360-advertiser-id --sa360-report-type keyword --sa360-column date --sa360-column impr --sa360-column clicks --sa360-start-date 2020-01-01 --sa360-end-date 2020-01-01 ``` -*If it doesn't work, try to* `export PYTHONPATH="."` *in the nautilus-connector-kit folder (to be sure Python is reading correctly)* -*If you want the output to be printed in your console, add* `write_console` *at the end of your command (see writers for more details)* +Didn't work? See [troubleshooting](#troubleshooting) section. +#### Parameters -- Parameters of the SA360 Reader +|CLI option|Documentation| +|--|--| +|`--sa360-client-id`|OAuth2 ID| +|`--sa360-client-secret`|OAuth2 secret| +|`--sa360-access-token`|(Optional) Access token| +|`--sa360-refresh-token`|Refresh token| +|`--sa360-agency-id`|Agency ID to request in SA360| +|`--sa360-advertiser-id`|(Optional) Advertiser ids to request. If not provided, every advertiser of the agency will be requested| +|`--sa360-report-name`|(Optional) Name of the output report| +|`--sa360-report-type`| Type of the report to request. *Possible values can be found [here](https://developers.google.com/search-ads/v2/report-types).*| +|`--sa360-column`|Dimensions and metrics to include in the report| +|`--sa360-saved-column`|(Optional) Saved columns to report. *Documentation can be found [here](https://developers.google.com/search-ads/v2/how-tos/reporting/saved-columns).*| +|`--sa360-start-date`|Start date of the period to request (format: YYYY-MM-DD)| +|`--sa360-end-date`|End date of the period to request (format: YYYY-MM-DD)| -| CLI option | Documentation | -| ---------- | ------------- | -|`--sa360-access-token` | (Optional) Access token | -|`--sa360-client-id` | OAuth2 ID | -|`--sa360-client-secret` | OAuth2 ID Secret | -|`--sa360-refresh-token` | Refresh token | -|`--sa360-agency-id` | Agency ID to request in SA360 | -|`--sa360-advertiser-id` | (Optional) Advertiser ids to request. If not provided, every advertiser of the agency will be requested| -|`--sa360-report-name` | (Optional) Name of the output report | -|`--sa360-report-type` | Type of the report to request. List [here](https://developers.google.com/search-ads/v2/report-types)| -|`--sa360-column` | Dimensions and metrics to request in the report | -|`--sa360-saved-column` | (Optional) Saved columns to report. See [documentation](https://developers.google.com/search-ads/v2/how-tos/reporting/saved-columns)| -|`--sa360-start-date` | Start date of the period to request | -|`--sa360-end-date` | End date of the period to request | +See documentation [here](https://developers.google.com/search-ads/v2/how-tos/reporting) for a better understanding of the parameters. -See the documents below for a better understanding of the parameters: -- [SA360 Reporting](https://developers.google.com/search-ads/v2/how-tos/reporting) +### Google Sheets Reader -## Yandex readers +#### Source API -For now, there is only one Yandex API you can access through Nautilus connectors: [Direct API](https://tech.yandex.com/direct/). -This API allows you to collect display metrics. +[Google Sheets API](https://developers.google.com/sheets/api) -### Access Yandex Direct API +#### Quickstart -In order to access Yandex Direct API, you need two accounts: an advertiser account and a developer account. -Here is the process: +This command allows you to retrieve the desired information from the google sheet row by row in a dict format. For example, given 3 columns a, b, c and 2 rows with respectively the values d,e,f and g, h, i, we would obtain such a dict : -1. Create a developer account if you don't already have one. Click on the *Get started* button on this [page](https://direct.yandex.com/). -2. Create and register an app that will access Yandex Direct API via [Yandex OAuth](https://oauth.yandex.com/client/new). -3. Keep app client id safe. Log in with your advertiser account and [give permission to the app to access your data](https://tech.yandex.com/oauth/doc/dg/tasks/get-oauth-token-docpage/). -4. Store your token very carefully. -5. Log out and log in as a developer and [ask permission to access Yandex Direct API](https://direct.yandex.com/registered/main.pl?cmd=apiSettings) (ask for Full access). Fill in the form. -6. Wait for Yandex support to reply but it should be within a week. +``` +{"a": "d", "b": "e", "c": "f"} +{"a": "g", "b": "h", "c": "i"} +``` -### Yandex campaign reader +#### Parameters -[Official documentation](https://tech.yandex.com/direct/doc/ref-v5/campaigns/get-docpage/) +|CLI option|Documentation| +|--| -| +|`--gs-project-id`|Project ID that is given by Google services once you have created your project in the google cloud console. You can retrieve it in the JSON credential file| +|`--gs-private-key-id`|Private key ID given by Google services once you have added credentials to the project. You can retrieve it in the JSON credential file| +|`--gs-private-key-path`|The path to the private key that is stored in a txt file. You can retrieve it first in the JSON credential file| +|`--gs-client-email`|Client e-mail given by Google services once you have added credentials to the project. You can retrieve it in the JSON credential file| +|`--gs-client-id`|Client ID given by Google services once you have added credentials to the project. You can retrieve it in the JSON credential file| +|`--gs-client-cert`|Client certificate given by Google services once you have added credentials to the project. You can retrieve it in the JSON credential file| +|`--gs-file-name`|The name you have given to your google sheet file| +|`--gs-page-number`|The page number you want to access.The number pages starts at 0| -#### Quickstart +#### How to obtain credentials -If you want to quickly get to the point, here is a simple command that get the daily budget for all your campaigns. +To use the nck google_sheets you must first retrieve your credentials. In order to do so head to console.cloud.google.com. In the header, chose your project or create a new one. Next step is to enable some APIs, namely google drive and google sheets api in the API Library. You’ll find it in the « APIs & Services » tab. Now that your google drive API is enabled, click on the « create credentials » button on the upper right corner and enter these informations : -```bash -python nck/entrypoint.py read_yandex_campaigns --yandex-token --yandex-field-name Id --yandex-field-name Name --yandex-field-name DailyBudget write_console -``` +![alt text](https://github.com/artefactory/nautilus-connectors-kit/blob/upgrade-gs/documentation_images/credentials_gs.png) -Didn't work? See [troubleshooting](#troubleshooting) section. +Click on "what credentials do I need" and complete the form. +You will find the credentials you need in the JSON file that will start downloading automatically right after. -#### Parameters -| CLI option | Documentation | -| ---------- | ------------- | -| `--yandex-token` | Bear token that allows you to authenticate to the API | -| `--yandex-campaign-id` | (Optional) Selects campaigns with the specified IDs. | -| `--yandex-campaign-state` | (Optional) Selects campaigns with the specified [states](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status). | -| `--yandex-campaign-status` | (Optional) Selects campaigns with the specified [statuses](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status). | -| `--yandex-campaign-payment-status` | (Optional) Selects campaigns with the specified payment [statuses](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status). | -| `--yandex-field-name` | Parameters to get that are common to all types of campaigns. | +## Oracle Reader -### Yandex statistics reader +*Not documented yet.* -[Official documentation](https://tech.yandex.com/direct/doc/reports/reports-docpage/) +## MySQL Reader + +*Not documented yet.* + +## Radarly Reader + +*Not documented yet.* + +## Salesforce Reader + +*Not documented yet.* + +## The Trade Desk Reader + +#### How to obtain credentials + +- Ask your Account Representative to **give you access to The Trade Desk API and UI** +- He will generally provide you with **two distinct accounts**: an **API account**, allowing you to make API calls (*Login: ttd_api_{XXXXX}@client.com*), and a **UI account**, allowing you to navigate on The Trade Desk UI to create Report Templates (*Login: your professional e-mail address*) +- Pass **the Login and Password of your API account** to The Trade Desk connector #### Quickstart -The command below gives you a performance report for all your campaigns and since the beginning. +To request dimensions and metrics to The Trade Desk API, you should first **create a Report Template in The Trade Desk UI**, by following the below process: -```bash -python nck/entrypoint.py read_yandex_statistics --yandex-token --yandex-report-type AD_PERFORMANCE_REPORT --yandex-field-name AdFormat --yandex-field-name AdId --yandex-field-name Impressions --yandex-include-vat True --yandex-report-language en --yandex-field-name AdGroupName --yandex-field-name AdGroupId --yandex-field-name AdNetworkType --yandex-field-name CampaignId --yandex-field-name CampaignName --yandex-field-name CampaignType --yandex-field-name Date --yandex-field-name Device --yandex-field-name Clicks --yandex-field-name Conversions --yandex-field-name Cost --yandex-date-range ALL_TIME write_console -``` +- Connect to [The Trade Desk UI](https://desk.thetradedesk.com/) using the Login and Password of your UI account +- Navigate to *Reports* > *My Reports* to land on the *Report Templates* section +- Clone an existing Report Template, edit it to keep only the dimensions and metrics that you want to collect, and save it: it will appear under the *Mine* section +- Provide the exact name of the Report Template you have just created under the CLI option `--ttd-report-template-name` of The Trade Desk connector: the connector will "schedule" a report instance (which may take a few minutes to run), and fetch data to the location of your choice +The following command retrieves the data associated to the Report template named "*adgroup_performance_report*" between 2020-01-01 and 2020-01-03, filtered on the PartnerId : +``` +python nck/entrypoint.py read_ttd --ttd-login --ttd-password --ttd-partner-id --ttd-report-template-name adgroup_performance_report --ttd-start-date 2020-01-01 --ttd-end-date 2020-01-03 write_console +``` Didn't work? See [troubleshooting](#troubleshooting) section. #### Parameters -Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/). +|CLI option|Documentation| +|--|--| +|`--ttd-login`|Login of your API account| +|`--ttd-password`|Password of your API account| +|`--ttd-advertiser-id`|Advertiser Ids for which report data should be fetched| +|`--ttd-report-template-name`|Exact name of the Report Template to request. Existing Report Templates can be found within the [MyReports section](https://desk.thetradedesk.com/MyReports) of The Trade Desk UI.| +|`--ttd-report-schedule-name`|Name of the Report Schedule to create| +|`--ttd-start-date`|Start date of the period to request (format: YYYY-MM-DD)| +|`--ttd-end-date`|End date of the period to request (format: YYYY-MM-DD)| +|`--ttd-normalize-stream`|If set to True, yields a NormalizedJSONStream (spaces and special characters replaced by '_' in field names, which is useful for BigQuery). Else (*default*), yields a standard JSONStream.| -| CLI option | Documentation | -| ---------- | ------------- | -| `--yandex-token` | Bear token that allows you to authenticate to the API | -| `--yandex-report-language` | (Optional) Language of the report. See all options [here](https://tech.yandex.com/direct/doc/dg/concepts/headers-docpage/#headers__accept-language). | -| `--yandex-filter` | (Optional) Filters on a particular field. | -| `--yandex-max-rows` | (Optional) The maximum number of rows in the report. | -| `--yandex-field-name` | Information you want to collect. Complete list [here](https://tech.yandex.com/direct/doc/reports/fields-list-docpage/). | -| `--yandex-report-type` | Type of report. Linked to the fields you want to select. | -| `--yandex-date-range` | List [here](https://tech.yandex.com/direct/doc/reports/period-docpage/). | -| `--yandex-include-vat` | Adds VAT to your expenses if set to `True`| -| `--yandex-date-start` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-stop` and `--yandex-date-range` set to `CUSTOM_DATE`. | -| `--yandex-date-stop` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-start` and `--yandex-date-range` set to `CUSTOM_DATE`. | +If you need any further information, the documentation of The Trade Desk API can be found [here](https://api.thetradedesk.com/v3/portal/api/doc/ApiOverview). -## Adobe Analytics Readers +## Twitter Ads Reader -As of May 2020 (last update of this section of the documentation), **two versions of Adobe Analytics Reporting API are coexisting: 1.4 and 2.0**. As some functionalities of API 1.4 have not been made available in API 2.0 yet (Data Warehouse reports in particular), our Adobe Analytics Readers are also available in these two versions. +#### Source API -#### How to obtain credentials +[Twitter Ads API](https://developer.twitter.com/en/docs/ads/general/overview) -Both Adobe Analytics Readers use the **JWT authentication framework**. -- Get developer access to Adobe Analytics (documentation can be found [here](https://helpx.adobe.com/enterprise/using/manage-developers.html)) -- Create a Service Account integration to Adobe Analytics on [Adobe Developer Console](https://console.adobe.io/) -- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID, Organization ID and private.key file) to retrieve your Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)). All these parameters will be passed to Adobe Analytics Readers. +#### How to obtain credentials -### Adobe Analytics Reader 1.4 +* **Apply for a developer account** through [this link](https://developer.twitter.com/en/apply). +* **Create a Twitter app** on the developer portal: it will generate your authentication credentials. +* **Apply for Twitter Ads API access** by filling out [this form](https://developer.twitter.com/en/docs/ads/general/overview/adsapi-application). Receiving Twitter approval may take up to 7 business days. +* **Get access to the Twitter Ads account** you wish to retrieve data for, on the @handle that you used to create your Twitter App. Be careful, access levels matter: with an *Ad Manager* access, you will be able to request all report types; with a *Campaign Analyst* access, you will be able to request all report types, except ENTITY reports on Card entities. #### Quickstart -Call example to Adobe Analytics Reader 1.4, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: +The Twitter Ads Reader can collect **3 types of reports**, making calls to 4 endpoints of the Twitter Ads API: +* **ANALYTICS reports**, making calls to the [Asynchronous Analytics endpoint](https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous). These reports return performance data for a wide range of metrics, that **can be aggregated over time**. Output data **can be splitted by day** when requested over a larger time period. +* **REACH reports**, making calls to the [Reach and Average Frequency endpoint](https://developer.twitter.com/en/docs/ads/analytics/api-reference/reach). These reports return performance data with a focus on reach and frequency metrics, that **cannot be aggregated over time** (*e.g. the reach of day A and B is not equal to the reach of day A + the reach of day B, as it counts unique individuals*). Output data **cannot be splitted by day** when requested over a larger time period. These reports are available **only for the Funding Instrument and Campaign entities**. +* **ENTITY reports**, making calls to [Campaign Management endpoints](https://developer.twitter.com/en/docs/ads/campaign-management/api-reference) if the selected entity is Funding Instrument, Campaign, Line Item, Media Creative or Promoted Tweet, and to the [Creative endpoint](https://developer.twitter.com/en/docs/ads/creatives/api-reference/) if the selected entity is Card. These reports return details on entity configuration since the creation of the Twitter Ads account. +*Call example for ANALYTICS reports*: this call will collect engagement metrics for Line Item entities, splitting the results by day, from 2020-01-01 to 2020-01-03: ``` -python nck/entrypoint.py read_adobe --adobe-client-id --adobe-client-secret --adobe-tech-account-id --adobe-org-id --adobe-private-key --adobe-global-company-id --adobe-report-suite-id --adobe-date-granularity day --adobe-report-element-id trackingcode --adobe-report-metric-id visits --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 write_console +python nck/entrypoint.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type ANALYTICS --twitter-entity LINE_ITEM --twitter-metric-group ENGAGEMENT --twitter-segmentation-type AGE --twitter-granularity DAY --twitter-start-date 2020-01-01 --twitter-end-date 2020-01-03 write_console +``` + +*Call example for REACH reports*: this call will collect reach metrics (*total_audience_reach, average_frequency*) for Campaign entities, from 2020-01-01 to 2020-01-03: +``` +python nck/entrypoint.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type REACH --twitter-entity CAMPAIGN --twitter-start-date 2020-01-01 --twitter-end-date 2020-01-03 write_console +``` + +*Call example for ENTITY reports*: this call collects details on the configuration of Campaign entities (id, name, total_budget_amount_local_micro, currency), since the creation of the Twitter Ads account: +``` +python nck/entrypoint.py read_twitter --twitter-consumer-key --twitter-consumer-secret --twitter-access-token --twitter-access-token-secret --twitter-account-id --twitter-report-type REACH --twitter-entity CAMPAIGN --twitter-entity-attribute id --twitter-entity-attribute name --twitter-entity-attribute total_budget_amount_local_micro --twitter-entity-attribute currency write_console ``` +Didn't work? See [troubleshooting](#troubleshooting) section. + #### Parameters |CLI option|Documentation| |--|--| -|`--adobe-client-id`|Client ID, that you can find on Adobe Developer Console| -|`--adobe-client-secret`|Client Secret, that you can find on Adobe Developer Console| -|`--adobe-tech-account-id`|Technical Account ID, that you can find on Adobe Developer Console| -|`--adobe-org-id`|Organization ID, that you can find on Adobe Developer Console| -|`--adobe-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| -|`--adobe-global-company-id`|Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md))| -|`--adobe-list-report-suite`|Should be set to *True* if you wish to request the list of available Adobe Report Suites (*default: False*). If set to *True*, the below parameters should be left empty.| -|`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| -|`--adobe-report-element-id`|ID of the element (i.e. dimension) to include in the report| -|`--adobe-report-metric-id`|ID of the metric to include in the report| -|`--adobe-date-granularity`|Granularity of the report. *Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS*| -|`--adobe-start-date`|Start date of the report (format: YYYY-MM-DD)| -|`--adobe-end-date`|End date of the report (format: YYYY-MM-DD)| +|`--twitter-consumer-key`|API key, available in the 'Keys and tokens' section of your Twitter Developer App.| +|`--twitter-consumer-secret`|API secret key, available in the 'Keys and tokens' section of your Twitter Developer App.| +|`--twitter-access-token`|Access token, available in the 'Keys and tokens' section of your Twitter Developer App.| +|`--twitter-access-token-secret`|Access token secret, available in the 'Keys and tokens' section of your Twitter Developer App.| +|`--twitter-account-id`|Specifies the Twitter Account ID for which the data should be returned.| +|`--twitter-report-type`|Specifies the type of report to collect. *Possible values: ANALYTICS, REACH, ENTITY.*| +|`--twitter-entity`|Specifies the entity type to retrieve data for. *Possible values: FUNDING_INSTRUMENT, CAMPAIGN, LINE_ITEM, MEDIA_CREATIVE, PROMOTED_TWEET, CARD.*| +|`--twitter-entity-attribute`|Specific to ENTITY reports. Specifies the entity attribute (configuration detail) that should be returned. *To get possible values, print the ENTITY_ATTRIBUTES variable on nck/helpers/twitter_helper.py*| +|`--twitter-granularity`|Specific to ANALYTICS reports. Specifies how granular the retrieved data should be. *Possible values: TOTAL (default), DAY.*| +|`--twitter-metric-group`|Specific to ANALYTICS reports. Specifies the list of metrics (as a group) that should be returned. *Possible values can be found [here](https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation).* | +|`--twitter-placement`|Specific to ANALYTICS reports. Scopes the retrieved data to a particular placement. *Possible values: ALL_ON_TWITTER (default), PUBLISHER_NETWORK.*| +|`--twitter-segmentation-type`|Specific to ANALYTICS reports. Specifies how the retrieved data should be segmented. *Possible values can be found [here](https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation).* | +|`--twitter-platform`|Specific to ANALYTICS reports. Required if segmentation_type is set to DEVICES or PLATFORM_VERSION. *Possible values can be identified through the targeting_criteria/locations*| +|`--twitter-country`|Specific to ANALYTICS reports. Required if segmentation_type is set to CITIES, POSTAL_CODES, or REGION. *Possible values can be identified through the GET targeting_criteria/platforms endpoint.*| +|`--twitter-start-date`|Start date of the period to request (format: YYYY-MM-DD).| +|`--twitter-end-date`|End date of the period to request (format: YYYY-MM-DD).| +|`--twitter-add-request-date-to-report`|If set to *True* (default: *False*), the date on which the request is made will appear on each report record.| + +If you need any further information, the documentation of Twitter Ads API can be found [here](https://developer.twitter.com/en/docs/ads/general/overview). To get a better understanding of **Twitter Ads Hierrarchy and Terminology**, we advise you to have a look at [this page](https://developer.twitter.com/en/docs/tutorials/ads-api-hierarchy-terminology). + +## Yandex Readers + +#### Source API + +[Yandex Direct API](https://tech.yandex.com/direct/) -#### Addtional information -- **The full list of available elements and metrics** can be retrieved with the [GetElements](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetElements.md) and [GetMetrics](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetMetrics.md) methods. -- **Adobe Analytics Reader 1.4 requests Data Warehouse reports** (the "source" parameter is set to "warehouse" in the report description), allowing it to efficiently process multiple-dimension requests. -- **If you need further information**, the documentation of Adobe APIs 1.4 can be found [here](https://github.com/AdobeDocs/analytics-1.4-apis). +#### How to obtain credentials -### Adobe Analytics Reader 2.0 +In order to access Yandex Direct API, you need two accounts: an advertiser account and a developer account. +Here is the process: + +1. Create a developer account if you don't already have one. Click on the *Get started* button on this [page](https://direct.yandex.com/). +2. Create and register an app that will access Yandex Direct API via [Yandex OAuth](https://oauth.yandex.com/client/new). +3. Keep app client id safe. Log in with your advertiser account and [give permission to the app to access your data](https://tech.yandex.com/oauth/doc/dg/tasks/get-oauth-token-docpage/). +4. Store your token very carefully. +5. Log out and log in as a developer and [ask permission to access Yandex Direct API](https://direct.yandex.com/registered/main.pl?cmd=apiSettings) (ask for Full access). Fill in the form. +6. Wait for Yandex support to reply but it should be within a week. + +### Yandex Campaign Reader + +[Official documentation](https://tech.yandex.com/direct/doc/ref-v5/campaigns/get-docpage/) #### Quickstart -Call example to Adobe Analytics Reader 2.0, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: +The following command retrieves the daily budget of all your campaigns, since your account creation. ``` -python nck/entrypoint.py read_adobe_2_0 --adobe-2-0-client-id --adobe-2-0-client-secret --adobe-2-0-tech-account-id --adobe-2-0-org-id --adobe-2-0-private-key --adobe-2-0-global-company-id --adobe-2-0-report-suite-id --adobe-2-0-dimension daterangeday --adobe-2-0-dimension campaign --adobe-2-0-start-date 2020-01-01 --adobe-2-0-end-date 2020-01-31 --adobe-2-0-metric visits write_console +python nck/entrypoint.py read_yandex_campaigns --yandex-token --yandex-field-name Id --yandex-field-name Name --yandex-field-name DailyBudget write_console ``` +Didn't work? See [troubleshooting](#troubleshooting) section. + #### Parameters |CLI option|Documentation| -|--|--| -|`--adobe-2-0-client-id`|Client ID, that you can find on Adobe Developer Console| -|`--adobe-2-0-client-secret`|Client Secret, that you can find on Adobe Developer Console| -|`--adobe-2-0-tech-account-id`|Technical Account ID, that you can find on Adobe Developer Console| -|`--adobe-2-0-org-id`|Organization ID, that you can find on Adobe Developer Console| -|`--adobe-2-0-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| -|`--adobe-2-0-global-company-id`|Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md))| -|`--adobe-2-0-report-suite-id`|ID of the requested Adobe Report Suite| -|`--adobe-2-0-dimension`|Dimension to include in the report| -|`--adobe-2-0-metric`|Metric to include in the report| -|`--adobe-2-0-start-date`|Start date of the report (format: YYYY-MM-DD)| -|`--adobe-2-0-end-date`|End date of the report (format: YYYY-MM-DD)| +|--| -| +|`--yandex-token`|Bear token that allows you to authenticate to the API| +|`--yandex-campaign-id`|(Optional) Selects campaigns with the specified IDs.| +|`--yandex-campaign-state`|(Optional) Selects campaigns with the specified states. *Possible values can be found [here](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status).*| +|`--yandex-campaign-status`|(Optional) Selects campaigns with the specified statuses. *Possible values can be found [here](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status).*| +|`--yandex-campaign-payment-status`|(Optional) Selects campaigns with the specified payment [statuses](https://tech.yandex.com/direct/doc/dg/objects/campaign-docpage/#status).| +|`--yandex-field-name`|Parameters to get that are common to all types of campaigns.| -#### Additional information +### Yandex Statistics Reader -- **In API 2.0, dimension and metric names are slightly different from API 1.4**. To get new metric and dimension names and reproduce the behavior of Adobe Analytics UI as closely as possible, [enable the Debugger feature in Adobe Analytics Workspace](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md): it allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. -- **In API 2.0, the date granularity parameter was removed, and should now be handled as a dimension**: a request featuring `--adobe-dimension daterangeday` will produce a report with a day granularity. -- **API 2.0 does not feature Data Warehouse reports yet** (along with other features, that are indicated on the "Current limitations" section of [this page](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/migration-guide.md)). For this reason, if you wish to collect multiple-dimension reports, Adobe Analytics Reader 1.4 might be a more efficient solution in terms of processing time. -- **If you need any further information**, the documentation of Adobe APIs 2.0 can be found [here](https://github.com/AdobeDocs/analytics-2.0-apis). +[Official documentation](https://tech.yandex.com/direct/doc/reports/reports-docpage/) -### Troubleshooting +#### Quickstart -You encountered and you don't know what 's going on. You may find an answer in the troubleshooting guide below. +The following command retrieves a performance report for all your campaigns, since your account creation. + +``` +python nck/entrypoint.py read_yandex_statistics --yandex-token --yandex-report-type AD_PERFORMANCE_REPORT --yandex-field-name AdFormat --yandex-field-name AdId --yandex-field-name Impressions --yandex-include-vat True --yandex-report-language en --yandex-field-name AdGroupName --yandex-field-name AdGroupId --yandex-field-name AdNetworkType --yandex-field-name CampaignId --yandex-field-name CampaignName --yandex-field-name CampaignType --yandex-field-name Date --yandex-field-name Device --yandex-field-name Clicks --yandex-field-name Conversions --yandex-field-name Cost --yandex-date-range ALL_TIME write_console +``` -1. **Have you install NCK dependencies?** In order to run NCK, you need to install all dependencies. First create a [virtual environment](https://docs.python.org/3/library/venv.html) and then run `pip install -r requirements.txt`. +Didn't work? See [troubleshooting](#troubleshooting) section. + +#### Parameters + +Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/). + +|CLI option|Documentation| +|--|--| +|`--yandex-token`|Bear token that allows you to authenticate to the API| +|`--yandex-report-language`|(Optional) Language of the report. *Possible values can be found [here](https://tech.yandex.com/direct/doc/dg/concepts/headers-docpage/#headers__accept-language).*| +|`--yandex-filter`|(Optional) Filters on a particular field.| +|`--yandex-max-rows`|(Optional) The maximum number of rows in the report.| +|`--yandex-field-name`|Information you want to collect. *Possible values can be found [here](https://tech.yandex.com/direct/doc/reports/fields-list-docpage/).*| +|`--yandex-report-type`|Type of report. Linked to the fields you want to select.| +|`--yandex-date-range`|*Possible values can be found [here](https://tech.yandex.com/direct/doc/reports/period-docpage/).*| +|`--yandex-include-vat`|Adds VAT to your expenses if set to `True`| +|`--yandex-date-start`|(Optional) Selects data on a specific period of time. Combined with `--yandex-date-stop` and `--yandex-date-range` set to `CUSTOM_DATE`.| +|`--yandex-date-stop`|(Optional) Selects data on a specific period of time. Combined with `--yandex-date-start` and `--yandex-date-range` set to `CUSTOM_DATE`.| + +## Troubleshooting + +You encountered an issue when running a Reader command and you don't know what's going on? +You may find an answer in the troubleshooting guide below. + +1. **Have you installed NCK dependencies?** In order to run NCK, you need to install all dependencies. First create a [virtual environment](https://docs.python.org/3/library/venv.html) and then run `pip install -r requirements.txt`. 2. **Have you set `PYTHONPATH` environment variable to the root of NCK folder?** -3. **Have you checked logs?** The code has been implmented so that every error is logged. For example, if you did not provide a valid token, you will see something like ```Invalid request. +3. **Have you checked logs?** The code has been implemented so that every error is logged. For example, if you did not provide a valid token, you will see something like ```Invalid request. {'error': {'error_code': '53', 'request_id': '8998435864716615689', 'error_string': 'Authorization error', 'error_detail': 'Invalid OAuth token'}}```. If you misspelled a field, you will get a message like this one: ```Error: Invalid value for "--yandex-field-name"```. diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 669688d6..b177167b 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +from nck.readers.dv360_reader import dv360 from nck.readers.reader import Reader from nck.readers.mysql_reader import mysql @@ -26,6 +27,8 @@ from nck.readers.gsheets_reader import gsheets from nck.readers.salesforce_reader import salesforce from nck.readers.facebook_reader import facebook +from nck.readers.ttd_reader import the_trade_desk +from nck.readers.twitter_reader import twitter from nck.readers.dbm_reader import dbm from nck.readers.dcm_reader import dcm from nck.readers.ga_reader import ga @@ -35,6 +38,7 @@ from nck.readers.radarly_reader import radarly from nck.readers.yandex_campaign_reader import yandex_campaigns from nck.readers.yandex_statistics_reader import yandex_statistics +from nck.readers.gs_reader import google_sheets readers = [ mysql, @@ -45,7 +49,10 @@ s3, sa360_reader, facebook, + the_trade_desk, + twitter, oracle, + dv360, dbm, dcm, ga, @@ -54,7 +61,8 @@ adobe_2_0, radarly, yandex_campaigns, - yandex_statistics + yandex_statistics, + google_sheets ] diff --git a/nck/readers/dbm_reader.py b/nck/readers/dbm_reader.py index 8fd47359..cb7aa80a 100644 --- a/nck/readers/dbm_reader.py +++ b/nck/readers/dbm_reader.py @@ -22,20 +22,20 @@ import requests import datetime -from itertools import chain - from googleapiclient import discovery from oauth2client import client, GOOGLE_REVOKE_URI from tenacity import retry, wait_exponential, stop_after_delay +from click import ClickException from nck.commands.command import processor from nck.readers.reader import Reader from nck.utils.args import extract_args from nck.streams.format_date_stream import FormatDateStream -from nck.utils.text import get_generator_dict_from_str_csv, add_column_value_to_csv_line_iterator +from nck.utils.text import get_report_generator_from_flat_file, skip_last +from nck.utils.date_handler import get_date_start_and_date_stop_from_range -from nck.helpers.dbm_helper import POSSIBLE_REQUEST_TYPES, FILE_TYPES_DICT +from nck.helpers.dbm_helper import POSSIBLE_REQUEST_TYPES DISCOVERY_URI = "https://analyticsreporting.googleapis.com/$discovery/rest" @@ -50,7 +50,7 @@ @click.option("--dbm-client-secret", required=True) @click.option("--dbm-query-metric", multiple=True) @click.option("--dbm-query-dimension", multiple=True) -@click.option("--dbm-request-type", type=click.Choice(POSSIBLE_REQUEST_TYPES)) +@click.option("--dbm-request-type", type=click.Choice(POSSIBLE_REQUEST_TYPES), required=True) @click.option("--dbm-query-id") @click.option("--dbm-query-title") @click.option("--dbm-query-frequency", default="ONE_TIME") @@ -64,9 +64,9 @@ help=( "Sometimes the date range on which metrics are computed is missing from the report. " "If this option is set to True, this range will be added." - ) + ), ) -@click.option("--dbm-filter", type=click.Tuple([str, int]), multiple=True) +@click.option("--dbm-filter", type=click.Tuple([str, str]), multiple=True) @click.option("--dbm-file-type", multiple=True) @click.option( "--dbm-date-format", @@ -78,7 +78,9 @@ "--dbm-day-range", required=True, default="LAST_7_DAYS", - type=click.Choice(["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK"]), + type=click.Choice( + ["PREVIOUS_DAY", "LAST_30_DAYS", "LAST_90_DAYS", "LAST_7_DAYS", "PREVIOUS_MONTH", "PREVIOUS_WEEK"] + ), ) @processor("dbm_access_token", "dbm_refresh_token", "dbm_client_secret") def dbm(**kwargs): @@ -88,7 +90,7 @@ def dbm(**kwargs): class DbmReader(Reader): API_NAME = "doubleclickbidmanager" - API_VERSION = "v1" + API_VERSION = "v1.1" def __init__(self, access_token, refresh_token, client_secret, client_id, **kwargs): credentials = client.GoogleCredentials( @@ -105,32 +107,27 @@ def __init__(self, access_token, refresh_token, client_secret, client_id, **kwar http = credentials.authorize(httplib2.Http()) credentials.refresh(http) - # API_SCOPES = ['https://www.googleapis.com/auth/doubleclickbidmanager'] self._client = discovery.build(self.API_NAME, self.API_VERSION, http=http, cache_discovery=False) self.kwargs = kwargs - def get_query(self, query_id, query_title): - response = self._client.queries().listqueries().execute() - if "queries" in response: - for q in response["queries"]: - if q["queryId"] == query_id or q["metadata"]["title"] == query_title: - return q + def get_query(self, query_id): + if query_id: + return self._client.queries().getquery(queryId=query_id).execute() else: - logging.info("No query found with the id {} or the title {}".format(query_id, query_title)) - return None + raise ClickException("Please provide a 'query_id' in order to find your query") def get_existing_query(self): query_id = self.kwargs.get("query_id", None) - query_title = self.kwargs.get("query_title", None) - query = self.get_query(query_id, query_id) + query = self.get_query(query_id) if query: return query else: - raise Exception("No query found with the id {} or the title {}".format(query_id, query_title)) + raise ClickException(f"No query found with the id {query_id}") def get_query_body(self): body_q = { + "kind": "doubleclickbidmanager#query", "metadata": { "format": "CSV", "title": self.kwargs.get("query_title", "NO_TITLE_GIVEN"), @@ -138,8 +135,8 @@ def get_query_body(self): }, "params": { "type": self.kwargs.get("query_param_type", "TYPE_TRUEVIEW"), - "groupBys": self.kwargs.get("query_dimension"), - "metrics": self.kwargs.get("query_metric"), + "groupBys": list(self.kwargs.get("query_dimension", [])), + "metrics": list(self.kwargs.get("query_metric", [])), "filters": [{"type": filt[0], "value": str(filt[1])} for filt in self.kwargs.get("filter")], }, "schedule": {"frequency": self.kwargs.get("query_frequency", "ONE_TIME")}, @@ -159,16 +156,14 @@ def create_and_get_query(self): query = self._client.queries().createquery(body=body_query).execute() return query - @retry( - wait=wait_exponential(multiplier=1, min=60, max=3600), - stop=stop_after_delay(36000), - ) + @retry(wait=wait_exponential(multiplier=1, min=60, max=3600), stop=stop_after_delay(36000)) def _wait_for_query(self, query_id): - logging.info( - "waiting for query of id : {} to complete running".format(query_id) - ) - query_infos = self.get_query(query_id, None) - if query_infos["metadata"]["running"]: + logging.info("waiting for query of id : {} to complete running".format(query_id)) + query_infos = self.get_query(query_id) + if query_infos["metadata"]["running"] or ( + "googleCloudStoragePathForLatestReport" not in query_infos["metadata"] + and "googleDrivePathForLatestReport" not in query_infos["metadata"] + ): raise Exception("Query still running.") else: return query_infos @@ -181,7 +176,10 @@ def get_query_report_url(self, existing_query=True): query_id = query_infos["queryId"] query_infos = self._wait_for_query(query_id) - if query_infos["metadata"]["googleCloudStoragePathForLatestReport"]: + if ( + "googleCloudStoragePathForLatestReport" in query_infos["metadata"] + and len(query_infos["metadata"]["googleCloudStoragePathForLatestReport"]) > 0 + ): url = query_infos["metadata"]["googleCloudStoragePathForLatestReport"] else: url = query_infos["metadata"]["googleDrivePathForLatestReport"] @@ -191,16 +189,19 @@ def get_query_report_url(self, existing_query=True): def get_query_report(self, existing_query=True): url = self.get_query_report_url(existing_query) report = requests.get(url, stream=True) - if self.kwargs["query_param_type"] == "TYPE_REACH_AND_FREQUENCY" \ - and self.kwargs["add_date_to_report"]: - return get_generator_dict_from_str_csv( - report.iter_lines(), - add_date=True, - day_range=self.kwargs["day_range"], - date_format=self.kwargs.get("date_format") + if self.kwargs["query_param_type"] == "TYPE_REACH_AND_FREQUENCY" and self.kwargs["add_date_to_report"]: + start, stop = get_date_start_and_date_stop_from_range(self.kwargs["day_range"]) + column_dict = { + "date_start": start.strftime(self.kwargs.get("date_format")), + "date_stop": stop.strftime(self.kwargs.get("date_format")), + } + report_gen = get_report_generator_from_flat_file( + report.iter_lines(), add_column=True, column_dict=column_dict ) + return skip_last(report_gen, 1) else: - return get_generator_dict_from_str_csv(report.iter_lines()) + report_gen = get_report_generator_from_flat_file(report.iter_lines()) + return skip_last(report_gen, 1) def list_query_reports(self): reports_infos = self._client.reports().listreports(queryId=self.kwargs.get("query_id")).execute() @@ -224,37 +225,9 @@ def get_lineitems_objects(self): response = self._client.lineitems().downloadlineitems(body=body_lineitems).execute() lineitems = response["lineItems"] lines = lineitems.split("\n") - return get_generator_dict_from_str_csv(lines) - - def get_sdf_body(self): - filter_types = [filt[0] for filt in self.kwargs.get("filter")] - assert ( - len([filter_types[0] == filt for filt in filter_types if filter_types[0] == filt]) == 1 - ), "sdf accept just one filter type, multiple filter types detected" - filter_ids = [str(filt[1]) for filt in self.kwargs.get("filter")] - - file_types = self.kwargs.get("file_type") - body_sdf = {"version": "5.1", "filterIds": filter_ids, "filterType": filter_types, "fileTypes": file_types} - return body_sdf - - def get_sdf_objects(self): - body_sdf = self.get_sdf_body() - file_types = body_sdf["fileTypes"] - response = self._client.sdf().download(body=body_sdf).execute() - - return chain( - *[ - get_generator_dict_from_str_csv( - add_column_value_to_csv_line_iterator( - response[FILE_TYPES_DICT[file_type]].split("\n"), "file_type", file_type - ) - ) - for file_type in file_types - ] - ) + return get_report_generator_from_flat_file(lines, skip_n_last=1) def read(self): - # request existing query request_type = self.kwargs.get("request_type") if request_type == "existing_query": data = [self.get_existing_query()] @@ -268,19 +241,10 @@ def read(self): data = self.list_query_reports() elif request_type == "lineitems_objects": data = self.get_lineitems_objects() - elif request_type == "sdf_objects": - data = self.get_sdf_objects() - else: - raise Exception("Unknown request type") def result_generator(): for record in data: yield record # should replace results later by a good identifier - yield FormatDateStream( - "results", - result_generator(), - keys=["Date"], - date_format=self.kwargs.get("date_format"), - ) + yield FormatDateStream("results", result_generator(), keys=["Date"], date_format=self.kwargs.get("date_format")) diff --git a/nck/readers/dv360_reader.py b/nck/readers/dv360_reader.py new file mode 100644 index 00000000..960f01c8 --- /dev/null +++ b/nck/readers/dv360_reader.py @@ -0,0 +1,181 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import click +import logging +import io +import httplib2 + +from itertools import chain +from typing import List + +from googleapiclient import discovery +from googleapiclient.http import MediaIoBaseDownload +from oauth2client import client, GOOGLE_REVOKE_URI +from tenacity import retry, wait_exponential, stop_after_delay + +from nck.helpers.dv360_helper import FILE_NAMES, FILE_TYPES, FILTER_TYPES +from nck.utils.exceptions import RetryTimeoutError, SdfOperationError +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.utils.file_reader import sdf_to_njson_generator, unzip +from nck.utils.args import extract_args +from nck.streams.format_date_stream import FormatDateStream + + +@click.command(name="read_dv360") +@click.option("--dv360-access-token", default=None, required=True) +@click.option("--dv360-refresh-token", required=True) +@click.option("--dv360-client-id", required=True) +@click.option("--dv360-client-secret", required=True) +@click.option("--dv360-advertiser-id", required=True) +@click.option("--dv360-file-type", type=click.Choice(FILE_TYPES), multiple=True, required=True) +@click.option("--dv360-filter-type", type=click.Choice(FILTER_TYPES), required=True) +@processor("dbm_access_token", "dbm_refresh_token", "dbm_client_secret") +def dv360(**kwargs): + return DV360Reader(**extract_args("dv360_", kwargs)) + + +class DV360Reader(Reader): + + API_NAME = "displayvideo" + API_VERSION = "v1" + SDF_VERSION = "SDF_VERSION_5_2" + + # path where to download the sdf file. + BASE = "/tmp" + + # name of the downloaded archive which may embeds several csv + # if more than one file type where to be provided. + ARCHIVE_NAME = "sdf" + + def __init__( + self, + access_token: str, + refresh_token: str, + client_id: str, + client_secret: str, + **kwargs + ): + + credentials = client.GoogleCredentials( + access_token, + client_id=client_id, + client_secret=client_secret, + refresh_token=refresh_token, + token_expiry=None, + token_uri="https://www.googleapis.com/oauth2/v4/token", + user_agent=None, + revoke_uri=GOOGLE_REVOKE_URI + ) + http = credentials.authorize(httplib2.Http()) + credentials.refresh(http) + + self._client = discovery.build( + self.API_NAME , self.API_VERSION, http=http, cache_discovery=False + ) + + self.kwargs = kwargs + self.file_names = self.get_file_names() + + def get_file_names(self) -> List[str]: + """ + DV360 api creates one file per file_type. + map file_type with the name of the generated file. + """ + return [f"SDF-{FILE_NAMES[file_type]}" for file_type in self.kwargs.get("file_type")] + + @retry( + wait=wait_exponential(multiplier=1, min=60, max=3600), + stop=stop_after_delay(36000), + ) + def _wait_sdf_download_request(self, operation): + """ + Wait for a sdf task to be completed. ie. (file ready for download) + Args: + operation (dict): task metadata + Returns: + operation (dict): task metadata updated with resource location. + """ + logging.info( + f"waiting for SDF operation: {operation['name']} to complete running." + ) + get_request = self._client.sdfdownloadtasks().operations().get(name=operation["name"]) + operation = get_request.execute() + if "done" not in operation: + raise RetryTimeoutError("The operation has taken more than 10 hours to complete.\n") + return operation + + def create_sdf_task(self, body): + """ + Create a sdf asynchronous task of type googleapiclient.discovery.Resource + Args: + body (dict) : request body to describe the data within the generated sdf file. + Return: + operation (dict) : contains the task metadata. + """ + + operation = self._client.sdfdownloadtasks().create(body=body).execute() + logging.info("Operation %s was created." % operation["name"]) + return operation + + def download_sdf(self, operation): + request = self._client.media().download(resourceName=operation["response"]["resourceName"]) + request.uri = request.uri.replace("?alt=json", "?alt=media") + sdf = io.FileIO(f"{self.BASE}/{self.ARCHIVE_NAME}.zip", mode="wb") + downloader = MediaIoBaseDownload(sdf, request) + done = False + while done is False: + status, done = downloader.next_chunk() + logging.info(f"Download {int(status.progress() * 100)}%.") + + def get_sdf_body(self): + return { + "parentEntityFilter": { + "fileType": self.kwargs.get("file_type"), + "filterType": self.kwargs.get("filter_type") + }, + "version": self.SDF_VERSION, + "advertiserId": self.kwargs.get("advertiser_id") + } + + def get_sdf_objects(self): + body = self.get_sdf_body() + init_operation = self.create_sdf_task(body=body) + created_operation = self._wait_sdf_download_request(init_operation) + if "error" in created_operation: + raise SdfOperationError("The operation finished in error with code %s: %s" % ( + created_operation["error"]["code"], + created_operation["error"]["message"])) + self.download_sdf(created_operation) + unzip(f"{self.BASE}/{self.ARCHIVE_NAME}.zip", output_path=self.BASE) + + # We chain operation if many file_types were to be provided. + return chain( + *[ + sdf_to_njson_generator(f"{self.BASE}/{file_name}.csv") + for file_name in self.file_names + ] + ) + + def read(self): + yield FormatDateStream( + "sdf", + self.get_sdf_objects(), + keys=["Date"], + date_format=self.kwargs.get("date_format"), + ) diff --git a/nck/readers/googleads_reader.py b/nck/readers/googleads_reader.py index 5693fe71..a331f6c5 100644 --- a/nck/readers/googleads_reader.py +++ b/nck/readers/googleads_reader.py @@ -26,6 +26,7 @@ from click import ClickException from googleads import adwords from googleads.oauth2 import GoogleRefreshTokenClient +from googleads.errors import AdWordsReportBadRequestError from nck.readers.reader import Reader from nck.utils.args import extract_args @@ -172,24 +173,30 @@ def valid_client_customer_id(client_customer_id): def fetch_report_from_gads_client_customer_obj( self, report_definition, client_customer_id ): - if self.valid_client_customer_id(client_customer_id): - adwords_client = self.init_adwords_client(client_customer_id) - report_downloader = adwords_client.GetReportDownloader() - customer_report = report_downloader.DownloadReportAsStream( - report_definition, - client_customer_id=client_customer_id, - include_zero_impressions=self.include_zero_impressions, - skip_report_header=True, - skip_column_header=True, - skip_report_summary=True, - ) - else: + if not self.valid_client_customer_id(client_customer_id): raise ClickException( - "Wrong format: " - + client_customer_id - + ". Client customer ID should be in the form 123-456-7890" + f"Wrong format: {client_customer_id}. Client customer ID should be in the form 123-456-7890." ) - return customer_report + else: + try: + adwords_client = self.init_adwords_client(client_customer_id) + report_downloader = adwords_client.GetReportDownloader() + customer_report = report_downloader.DownloadReportAsStream( + report_definition, + client_customer_id=client_customer_id, + include_zero_impressions=self.include_zero_impressions, + skip_report_header=True, + skip_column_header=True, + skip_report_summary=True, + ) + return customer_report + except AdWordsReportBadRequestError as e: + if e.type == "AuthorizationError.CUSTOMER_NOT_ACTIVE": + logging.warning( + f"Skipping clientCustomerId {client_customer_id} (inactive)." + ) + else: + raise Exception(f"Wrong request. Error type: {e.type}") def get_customer_ids(self, manager_id): """Retrieves all CustomerIds in the account hierarchy. @@ -214,7 +221,7 @@ def get_customer_ids(self, manager_id): selector = { "fields": ["CustomerId"], "predicates": [ - {"field": "CanManageClients", "operator": "EQUALS", "values": [False]} + {"field": "CanManageClients", "operator": "EQUALS", "values": [False]}, ], "paging": {"startIndex": str(offset), "numberResults": str(PAGE_SIZE)}, } @@ -346,19 +353,18 @@ def format_and_yield(self): customer_report = self.fetch_report_from_gads_client_customer_obj( report_definition, googleads_account_id ) - customer_report = stream_reader(customer_report) - - for row in customer_report: - reader = csv.DictReader(StringIO(row), self.fields) - for row in reader: - if self.include_client_customer_id: - row['AccountId'] = googleads_account_id - - if self.filter_on_video_campaigns: - if row['CampaignId'] in video_campaign_ids: + if customer_report: + customer_report = stream_reader(customer_report) + for row in customer_report: + reader = csv.DictReader(StringIO(row), self.fields) + for row in reader: + if self.include_client_customer_id: + row["AccountId"] = googleads_account_id + if self.filter_on_video_campaigns: + if row["CampaignId"] in video_campaign_ids: + yield row + else: yield row - else: - yield row def read(self): if self.manager_id: diff --git a/nck/readers/gs_reader.py b/nck/readers/gs_reader.py new file mode 100644 index 00000000..9b553814 --- /dev/null +++ b/nck/readers/gs_reader.py @@ -0,0 +1,131 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import click +import gspread +from google.auth.transport.requests import AuthorizedSession +from google.oauth2 import service_account + +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.utils.args import extract_args +from nck.streams.json_stream import JSONStream + + +@click.command(name="read_gs") +@click.option( + "--gs-project-id", + required=True, + help="Project ID that is given by Google services once you have \ + created your project in the google cloud console. You can retrieve it in the JSON credential file", +) +@click.option( + "--gs-private-key-id", + required=True, + help="Private key ID given by Google services once you have added credentials \ + to the project. You can retrieve it in the JSON credential file", +) +@click.option( + "--gs-private-key", + required=True, + help="The private key given by Google services once you have added credentials \ + to the project. \ + You can retrieve it first in the JSON credential file", +) +@click.option( + "--gs-client-email", + required=True, + help="Client e-mail given by Google services once you have added credentials \ + to the project. You can retrieve it in the JSON credential file", +) +@click.option( + "--gs-client-id", + required=True, + help="Client ID given by Google services once you have added credentials \ + to the project. You can retrieve it in the JSON credential file", +) +@click.option( + "--gs-client-cert", + required=True, + help="Client certificate given by Google services once you have added credentials \ + to the project. You can retrieve it in the JSON credential file", +) +@click.option("--gs-sheet-key", required=True, help="Google spreadsheet key that is availbale in the url") +@click.option( + "--gs-page-number", + default=0, + type=click.INT, + help="The page number you want to access.\ + The number pages starts at 0", +) +@processor("gs_private_key_id", "gs_private_key", "gs_client_id", "gs_client_cert") +def google_sheets(**kwargs): + return GSheetsReader(**extract_args("gs_", kwargs)) + + +class GSheetsReader(Reader): + _scopes = [ + "https://www.googleapis.com/auth/spreadsheets.readonly", + "https://www.googleapis.com/auth/spreadsheets", + "https://www.googleapis.com/auth/drive.file", + "https://www.googleapis.com/auth/drive", + ] + + def __init__( + self, + project_id: str, + private_key_id: str, + private_key: str, + client_email: str, + client_id: str, + client_cert: str, + sheet_key: str, + page_number: int, + ): + self._sheet_key = sheet_key + self._page_number = page_number + credentials = self.__init_credentials( + project_id, private_key_id, private_key, client_email, client_id, client_cert + ) + scoped_credentials = credentials.with_scopes(self._scopes) + self._gc = gspread.Client(auth=scoped_credentials) + self._gc.session = AuthorizedSession(scoped_credentials) + + def __init_credentials(self, project_id, private_key_id, private_key, client_email, client_id, client_cert): + keyfile_dict = { + "type": "service_account", + "project_id": project_id, + "private_key_id": private_key_id, + "private_key": private_key.replace("\\n", "\n"), + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "client_email": client_email, + "client_id": client_id, + "client_x509_cert_url": client_cert, + "token_uri": "https://accounts.google.com/o/oauth2/token", + } + return service_account.Credentials.from_service_account_info(info=keyfile_dict) + + def read(self): + sheet = self._gc.open_by_key(self._sheet_key).get_worksheet(self._page_number) + list_of_hashes = sheet.get_all_records() + + def result_generator(): + for record in list_of_hashes: + yield record + + yield JSONStream("gsheet", result_generator()) diff --git a/nck/readers/sa360_reader.py b/nck/readers/sa360_reader.py index 1d7b1e30..3ff2d16f 100644 --- a/nck/readers/sa360_reader.py +++ b/nck/readers/sa360_reader.py @@ -23,7 +23,7 @@ from nck.clients.sa360_client import SA360Client from nck.helpers.sa360_helper import REPORT_TYPES from nck.utils.args import extract_args -from nck.utils.text import get_generator_dict_from_str_csv +from nck.utils.text import get_report_generator_from_flat_file DATEFORMAT = "%Y-%m-%d" ENCODING = "utf-8" @@ -42,9 +42,14 @@ help="If empty, all advertisers from agency will be requested", ) @click.option("--sa360-report-name", default="SA360 Report") -@click.option("--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0]) @click.option( - "--sa360-column", "sa360_columns", multiple=True, help="https://developers.google.com/search-ads/v2/report-types" + "--sa360-report-type", type=click.Choice(REPORT_TYPES), default=REPORT_TYPES[0] +) +@click.option( + "--sa360-column", + "sa360_columns", + multiple=True, + help="https://developers.google.com/search-ads/v2/report-types", ) @click.option( "--sa360-saved-column", @@ -75,7 +80,9 @@ def __init__( start_date, end_date, ): - self.sa360_client = SA360Client(access_token, client_id, client_secret, refresh_token) + self.sa360_client = SA360Client( + access_token, client_id, client_secret, refresh_token + ) self.agency_id = agency_id self.advertiser_ids = list(advertiser_ids) self.report_name = report_name @@ -102,11 +109,17 @@ def result_generator(self): report_data = self.sa360_client.assert_report_file_ready(report_id) - for report_generator in self.sa360_client.download_report_files(report_data, report_id): - yield from get_generator_dict_from_str_csv(report_generator, skip_last_row=False) + for line_iterator in self.sa360_client.download_report_files( + report_data, report_id + ): + yield from get_report_generator_from_flat_file(line_iterator) def read(self): if not self.advertiser_ids: - self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency(self.agency_id) + self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency( + self.agency_id + ) - yield NormalizedJSONStream("results" + "_".join(self.advertiser_ids), self.result_generator()) + yield NormalizedJSONStream( + "results" + "_".join(self.advertiser_ids), self.result_generator() + ) diff --git a/nck/readers/ttd_reader.py b/nck/readers/ttd_reader.py new file mode 100644 index 00000000..fad74013 --- /dev/null +++ b/nck/readers/ttd_reader.py @@ -0,0 +1,236 @@ +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import logging +import click +from click import ClickException +import requests +from datetime import timedelta +from tenacity import retry, wait_exponential, stop_after_delay + +from nck.utils.args import extract_args +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.streams.json_stream import JSONStream +from nck.streams.normalized_json_stream import NormalizedJSONStream +from nck.helpers.ttd_helper import ( + API_HOST, + API_ENDPOINTS, + DEFAULT_REPORT_SCHEDULE_ARGS, + DEFAULT_PAGING_ARGS, + ReportTemplateNotFoundError, + ReportScheduleNotReadyError, + format_date, +) +from nck.utils.text import get_report_generator_from_flat_file + + +@click.command(name="read_ttd") +@click.option("--ttd-login", required=True, help="Login of your API account") +@click.option("--ttd-password", required=True, help="Password of your API account") +@click.option( + "--ttd-advertiser-id", + required=True, + multiple=True, + help="Advertiser Ids for which report data should be fetched", +) +@click.option( + "--ttd-report-template-name", + required=True, + help="Exact name of the Report Template to request. Existing Report Templates " + "can be found within the MyReports section of The Trade Desk UI.", +) +@click.option( + "--ttd-report-schedule-name", + required=True, + help="Name of the Report Schedule to create.", +) +@click.option( + "--ttd-start-date", + required=True, + type=click.DateTime(), + help="Start date of the period to request (format: YYYY-MM-DD)", +) +@click.option( + "--ttd-end-date", + required=True, + type=click.DateTime(), + help="End date of the period to request (format: YYYY-MM-DD)", +) +@click.option( + "--ttd-normalize-stream", + type=click.BOOL, + default=False, + help="If set to True, yields a NormalizedJSONStream (spaces and special " + "characters replaced by '_' in field names, which is useful for BigQuery). " + "Else, yields a standard JSONStream.", +) +@processor("ttd_login", "ttd_password") +def the_trade_desk(**kwargs): + return TheTradeDeskReader(**extract_args("ttd_", kwargs)) + + +class TheTradeDeskReader(Reader): + def __init__( + self, + login, + password, + advertiser_id, + report_template_name, + report_schedule_name, + start_date, + end_date, + normalize_stream + ): + self.login = login + self.password = password + self._build_headers() + self.advertiser_ids = list(advertiser_id) + self.report_template_name = report_template_name + self.report_schedule_name = report_schedule_name + self.start_date = start_date + # Report end date is exclusive: to become inclusive, it should be incremented by 1 day + self.end_date = end_date + timedelta(days=1) + self.normalize_stream = normalize_stream + + self._validate_dates() + + def _validate_dates(self): + if self.end_date - timedelta(days=1) < self.start_date: + raise ClickException( + "Report end date should be equal or ulterior to report start date." + ) + + def _get_access_token(self): + url = f"{API_HOST}/authentication" + headers = {"Content-Type": "application/json"} + payload = { + "Login": self.login, + "Password": self.password, + "TokenExpirationInMinutes": 1440, + } + response = requests.post(url=url, headers=headers, json=payload) + if response.ok: + return response.json()["Token"] + else: + response.raise_for_status() + + def _build_headers(self): + self.headers = {"Content-Type": "application/json", "TTD-Auth": self._get_access_token()} + + def _make_api_call(self, method, endpoint, payload={}): + url = f"{API_HOST}/{endpoint}" + response = requests.request( + method=method, url=url, headers=self.headers, json=payload + ) + if response.ok: + if response.content: + return response.json() + else: + response.raise_for_status() + + def _get_report_template_id(self): + logging.info(f"Collecting ReportTemplateId of '{self.report_template_name}'") + method, endpoint = API_ENDPOINTS["get_report_template_id"] + payload = {"NameContains": self.report_template_name, **DEFAULT_PAGING_ARGS} + json_response = self._make_api_call(method, endpoint, payload) + if json_response["ResultCount"] == 0: + raise ReportTemplateNotFoundError( + f"No existing ReportTemplate match '{self.report_template_name}'" + ) + if json_response["ResultCount"] > 1: + raise ReportTemplateNotFoundError( + f"""'{self.report_template_name}' match more than one ReportTemplate. + Please specify the exact name of the ReportTemplate you wish to retrieve.""" + ) + else: + self.report_template_id = json_response["Result"][0]["ReportTemplateId"] + logging.info(f"Retrieved ReportTemplateId: {self.report_template_id}") + + def _create_report_schedule(self): + method, endpoint = API_ENDPOINTS["create_report_schedule"] + payload = { + "ReportScheduleName": self.report_schedule_name, + "ReportTemplateId": self.report_template_id, + "AdvertiserFilters": self.advertiser_ids, + "ReportStartDateInclusive": self.start_date.isoformat(), + "ReportEndDateExclusive": self.end_date.isoformat(), + **DEFAULT_REPORT_SCHEDULE_ARGS, + } + logging.info(f"Creating ReportSchedule: {payload}") + json_response = self._make_api_call(method, endpoint, payload) + self.report_schedule_id = json_response["ReportScheduleId"] + + @retry( + wait=wait_exponential(multiplier=1, min=60, max=3600), + stop=stop_after_delay(36000), + ) + def _wait_for_download_url(self): + report_execution_details = self._get_report_execution_details() + if report_execution_details["ReportExecutionState"] == "Pending": + raise ReportScheduleNotReadyError( + f"ReportSchedule '{self.report_schedule_id}' is still running." + ) + else: + # As the ReportSchedule that we just created runs only once, + # the API response will include only one ReportDelivery (so we can get index "[0]") + self.download_url = report_execution_details["ReportDeliveries"][0][ + "DownloadURL" + ] + logging.info( + f"ReportScheduleId '{self.report_schedule_id}' is ready. DownloadURL: {self.download_url}" + ) + + def _get_report_execution_details(self): + method, endpoint = API_ENDPOINTS["get_report_execution_details"] + payload = { + "AdvertiserIds": self.advertiser_ids, + "ReportScheduleIds": [self.report_schedule_id], + **DEFAULT_PAGING_ARGS, + } + json_response = self._make_api_call(method, endpoint, payload) + # As the ReportScheduleId that we provided as a payload is globally unique, + # the API response will include only one Result (so we can get index "[0]") + report_execution_details = json_response["Result"][0] + return report_execution_details + + def _download_report(self): + report = requests.get(url=self.download_url, headers=self.headers, stream=True) + return get_report_generator_from_flat_file(report.iter_lines()) + + def _delete_report_schedule(self): + logging.info(f"Deleting ReportScheduleId '{self.report_schedule_id}'") + method, endpoint = API_ENDPOINTS["delete_report_schedule"] + self._make_api_call(method, f"{endpoint}/{self.report_schedule_id}") + + def read(self): + self._get_report_template_id() + self._create_report_schedule() + self._wait_for_download_url() + data = self._download_report() + + def result_generator(): + for record in data: + yield { + k: format_date(v) if k == "Date" else v for k, v in record.items() + } + + if self.normalize_stream: + yield NormalizedJSONStream( + "results_" + "_".join(self.advertiser_ids), result_generator() + ) + else: + yield JSONStream( + "results_" + "_".join(self.advertiser_ids), result_generator() + ) + + self._delete_report_schedule() diff --git a/nck/readers/twitter_reader.py b/nck/readers/twitter_reader.py new file mode 100644 index 00000000..d82c6014 --- /dev/null +++ b/nck/readers/twitter_reader.py @@ -0,0 +1,585 @@ +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import logging +import click +from click import ClickException +from itertools import chain +from datetime import datetime, timedelta +from tenacity import retry, wait_exponential, stop_after_delay + +from nck.utils.args import extract_args +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.streams.json_stream import JSONStream +from nck.helpers.twitter_helper import ( + REPORT_TYPES, + ENTITY_OBJECTS, + ENTITY_ATTRIBUTES, + GRANULARITIES, + METRIC_GROUPS, + PLACEMENTS, + SEGMENTATION_TYPES, +) + +from twitter_ads.client import Client +from twitter_ads.utils import split_list +from twitter_ads import API_VERSION +from twitter_ads.http import Request +from twitter_ads.cursor import Cursor + +# from twitter_ads.creative import TweetPreview +from twitter_ads.creative import CardsFetch + +API_DATEFORMAT = "%Y-%m-%dT%H:%M:%SZ" +REP_DATEFORMAT = "%Y-%m-%d" +MAX_WAITING_SEC = 3600 +MAX_ENTITY_IDS_PER_JOB = 20 +MAX_CONCURRENT_JOBS = 100 + + +@click.command(name="read_twitter") +@click.option( + "--twitter-consumer-key", + required=True, + help="API key, available in the 'Keys and tokens' section of your Twitter Developper App.", +) +@click.option( + "--twitter-consumer-secret", + required=True, + help="API secret key, available in the 'Keys and tokens' section of your Twitter Developper App.", +) +@click.option( + "--twitter-access-token", + required=True, + help="Access token, available in the 'Keys and tokens' section of your Twitter Developper App.", +) +@click.option( + "--twitter-access-token-secret", + required=True, + help="Access token secret, available in the 'Keys and tokens' section of your Twitter Developper App.", +) +@click.option( + "--twitter-account-id", + required=True, + help="Specifies the Twitter Account ID for which the data should be returned.", +) +@click.option( + "--twitter-report-type", + required=True, + type=click.Choice(REPORT_TYPES), + help="Specifies the type of report to collect: " + "ANALYTICS (performance report, any kind of metrics), " + "REACH (performance report, focus on reach and frequency metrics), " + "ENTITY (entity configuration report)", +) +@click.option( + "--twitter-entity", + required=True, + type=click.Choice(list(ENTITY_ATTRIBUTES.keys())), + help="Specifies the entity type to retrieve data for.", +) +@click.option( + "--twitter-entity-attribute", + multiple=True, + help="Specific to 'ENTITY' reports. " + "Specifies the entity attribute (a.k.a. dimension) that should be returned.", +) +@click.option( + "--twitter-granularity", + type=click.Choice(GRANULARITIES), + default="TOTAL", + help="Specific to 'ANALYTICS' reports. Specifies how granular the retrieved data should be.", +) +@click.option( + "--twitter-metric-group", + multiple=True, + type=click.Choice(METRIC_GROUPS), + help="Specific to 'ANALYTICS' reports. Specifies the list of metrics (as a group) that should be returned: " + "https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation", +) +@click.option( + "--twitter-placement", + type=click.Choice(PLACEMENTS), + default="ALL_ON_TWITTER", + help="Specific to 'ANALYTICS' reports. Scopes the retrieved data to a particular placement.", +) +@click.option( + "--twitter-segmentation-type", + type=click.Choice(SEGMENTATION_TYPES), + help="Specific to 'ANALYTICS' reports. Specifies how the retrieved data should be segmented: " + "https://developer.twitter.com/en/docs/ads/analytics/overview/metrics-and-segmentation", +) +@click.option( + "--twitter-platform", + help="Specific to 'ANALYTICS' reports. Required if segmentation_type is set to 'DEVICES' or 'PLATFORM_VERSION'. " + "To get possible values: GET targeting_criteria/locations", +) +@click.option( + "--twitter-country", + help="Specific to 'ANALYTICS' reports. Required if segmentation_type is set to 'CITIES', 'POSTAL_CODES', or 'REGION'. " + "To get possible values: GET targeting_criteria/platforms", +) +@click.option( + "--twitter-start-date", type=click.DateTime(), help="Specifies report start date." +) +@click.option( + "--twitter-end-date", + type=click.DateTime(), + help="Specifies report end date (inclusive).", +) +@click.option( + "--twitter-add-request-date-to-report", + type=click.BOOL, + default=False, + help="If set to 'True', the date on which the request is made will appear on each report record.", +) +@processor( + "twitter_consumer_key", + "twitter_consumer_secret", + "twitter_access_token", + "twitter_access_token_secret", +) +def twitter(**kwargs): + return TwitterReader(**extract_args("twitter_", kwargs)) + + +class TwitterReader(Reader): + def __init__( + self, + consumer_key, + consumer_secret, + access_token, + access_token_secret, + account_id, + report_type, + entity, + entity_attribute, + granularity, + metric_group, + placement, + segmentation_type, + platform, + country, + start_date, + end_date, + add_request_date_to_report, + ): + # Authentication inputs + self.client = Client( + consumer_key, consumer_secret, access_token, access_token_secret + ) + self.account = self.client.accounts(account_id) + + # General inputs + self.report_type = report_type + self.entity = entity + self.start_date = start_date + self.end_date = end_date + timedelta(days=1) + self.add_request_date_to_report = add_request_date_to_report + + # Report inputs: ENTITY + self.entity_attributes = list(entity_attribute) + + # Report inputs: ANALYTICS + self.granularity = granularity + self.metric_groups = list(metric_group) + self.placement = placement + self.segmentation_type = segmentation_type + self.platform = platform + self.country = country + + # Validate inputs + self.validate_inputs() + + def validate_inputs(self): + """ + Validate combination of input parameters (triggered in TwitterReader constructor). + """ + + self.validate_dates() + self.validate_analytics_segmentation() + self.validate_analytics_metric_groups() + self.validate_analytics_entity() + self.validate_reach_entity() + self.validate_entity_attributes() + + def validate_dates(self): + + if self.end_date - timedelta(days=1) < self.start_date: + raise ClickException( + "Report end date should be equal or ulterior to report start date." + ) + + def validate_analytics_segmentation(self): + + if self.report_type == "ANALYTICS": + if ( + self.segmentation_type in ["DEVICES", "PLATFORM VERSION"] + and not self.platform + ): + raise ClickException("Please provide a value for 'platform'.") + + elif ( + self.segmentation_type in ["CITIES", "POSTAL_CODES", "REGION"] + and not self.country + ): + raise ClickException("Please provide a value for 'country'.") + + def validate_analytics_metric_groups(self): + + if self.report_type == "ANALYTICS": + + if self.entity == "FUNDING_INSTRUMENT" and any( + [ + metric_group not in ["ENGAGEMENT", "BILLING"] + for metric_group in self.metric_groups + ] + ): + raise ClickException( + "'FUNDING_INSTRUMENT' only accept the 'ENGAGEMENT' and 'BILLING' metric groups." + ) + + if ( + "MOBILE_CONVERSION" in self.metric_groups + and len(self.metric_groups) > 1 + ): + raise ClickException( + "'MOBILE_CONVERSION' data should be requested separately." + ) + + def validate_analytics_entity(self): + + if self.report_type == "ANALYTICS": + + if self.entity == "CARD": + raise ClickException( + f"'ANALYTICS' reports only accept following entities: {list(ENTITY_OBJECTS.keys())}." + ) + + def validate_reach_entity(self): + + if self.report_type == "REACH": + + if self.entity not in ["CAMPAIGN", "FUNDING_INSTRUMENT"]: + raise ClickException( + "'REACH' reports only accept the following entities: CAMPAIGN, FUNDING_INSTRUMENT." + ) + + def validate_entity_attributes(self): + + if self.report_type == "ENTITY": + + if not all( + [ + attr in ENTITY_ATTRIBUTES[self.entity] + for attr in self.entity_attributes + ] + ): + raise ClickException( + f"Available attributes for '{self.entity}' are: {ENTITY_ATTRIBUTES[self.entity]}" + ) + + def get_analytics_report(self, job_ids): + """ + Get 'ANALYTICS' report through the 'Asynchronous Analytics' endpoint of Twitter Ads API. + Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous + """ + + all_responses = [] + + for job_id in job_ids: + + logging.info(f"Processing job_id: {job_id}") + + # job_result = self.get_job_result(job_id) + # waiting_sec = 2 + + # while job_result.status == "PROCESSING": + # logging.info(f"Waiting {waiting_sec} seconds for job to be completed") + # sleep(waiting_sec) + # if waiting_sec > MAX_WAITING_SEC: + # raise JobTimeOutError("Waited too long for job to be completed") + # waiting_sec *= 2 + # job_result = self.get_job_result(job_id) + + job_result = self._waiting_for_job_to_complete(job_id) + raw_analytics_response = self.get_raw_analytics_response(job_result) + all_responses.append(self.parse(raw_analytics_response)) + + return chain(*all_responses) + + def get_active_entity_ids(self): + """ + Step 1 of 'ANALYTICS' report generation process: + Returns a list containing the ids of active entities over the requested time period + Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/active-entities + """ + + active_entities = ENTITY_OBJECTS[self.entity].active_entities( + self.account, self.start_date, self.end_date + ) + return [obj["entity_id"] for obj in active_entities] + + def get_job_ids(self, entity_ids): + """ + Step 2 of 'ANALYTICS' report generation process: + Create asynchronous analytics jobs and return their ids for progress tracking + Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous + """ + + return [ + ENTITY_OBJECTS[self.entity] + .queue_async_stats_job( + self.account, + chunk_entity_ids, + self.metric_groups, + granularity=self.granularity, + placement=self.placement, + start_time=self.start_date, + end_time=self.end_date, + segmentation_type=self.segmentation_type, + platform=self.platform, + country=self.country, + ) + .id + for chunk_entity_ids in split_list(entity_ids, MAX_ENTITY_IDS_PER_JOB) + ] + + @retry( + wait=wait_exponential(multiplier=1, min=60, max=3600), + stop=stop_after_delay(36000), + ) + def _waiting_for_job_to_complete(self, job_id): + """ + Retrying to get job_result until job status is 'COMPLETED'. + """ + job_result = self.get_job_result(job_id) + if job_result.status == "PROCESSING": + raise Exception(f"Job {job_id} is still running.") + else: + return job_result + + def get_job_result(self, job_id): + """ + Step 3 of 'ANALYTICS' report generation process: + Get job info to track its progress (job_result.status) and download report once completed (job_result.url) + Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous + """ + + return ( + ENTITY_OBJECTS[self.entity] + .async_stats_job_result(self.account, job_ids=[job_id]) + .first + ) + + def get_raw_analytics_response(self, job_result): + """ + Step 4 of 'ANALYTICS' report generation process: + Download raw response from job once completed + Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous + """ + + return ENTITY_OBJECTS[self.entity].async_stats_job_data( + self.account, url=job_result.url + ) + + def parse(self, raw_analytics_response): + """ + Parse a single raw response into a generator of JSON-like records. + """ + + for entity_resp in raw_analytics_response["data"]: + for entity_data in entity_resp["id_data"]: + entity_records = [ + { + "id": entity_resp["id"], + **{ + mt: 0 + if entity_data["metrics"][mt] is None + else entity_data["metrics"][mt][i] + for mt in entity_data["metrics"] + }, + } + for i in range(raw_analytics_response["time_series_length"]) + ] + entity_records = self.add_daily_timestamps(entity_records) + entity_records = self.add_segment(entity_records, entity_data) + yield from entity_records + + def add_daily_timestamps(self, entity_records): + """ + Add daily timestamps to a list of records, if granularity is 'DAY'. + """ + + if self.granularity == "DAY": + period_items = self.get_daily_period_items() + return [ + {**entity_records[i], "date": period_items[i].strftime(REP_DATEFORMAT)} + for i in range(len(entity_records)) + ] + return entity_records + + def get_daily_period_items(self): + """ + Returns a list of datetime instances representing each date contained + in the requested period. Useful when granularity is set to 'DAY'. + """ + + delta = self.end_date - self.start_date + return [self.start_date + timedelta(days=i) for i in range(delta.days)] + + def add_segment(self, entity_records, entity_data): + """ + Add segment to a list of records, if a segmentation_type is requested. + """ + + if self.segmentation_type: + entity_segment = entity_data["segment"]["segment_name"] + return [ + {**rec, self.segmentation_type.lower(): entity_segment} + for rec in entity_records + ] + return entity_records + + def get_campaign_management_report(self): + """ + Get 'ENTITY' report through 'Campaign Management' endpoints of Twitter Ads API. + Supported entities: FUNDING_INSTRUMENT, CAMPAIGN, LINE_ITEM, MEDIA_CREATIVE, PROMOTED_TWEET + Documentation: https://developer.twitter.com/en/docs/ads/campaign-management/api-reference + """ + + ACCOUNT_CHILD_OBJECTS = { + "FUNDING_INSTRUMENT": self.account.funding_instruments(), + "CAMPAIGN": self.account.campaigns(), + "LINE_ITEM": self.account.line_items(), + "MEDIA_CREATIVE": self.account.media_creatives(), + "PROMOTED_TWEET": self.account.promoted_tweets(), + } + + yield from [ + {attr: getattr(entity_obj, attr, None) for attr in self.entity_attributes} + for entity_obj in ACCOUNT_CHILD_OBJECTS[self.entity] + ] + + def get_cards_report(self): + """ + Get 'ENTITY' report through the 'Creatives' endpoint of Twitter Ads API. + Supported entities: CARD + Documentation: https://developer.twitter.com/en/docs/ads/creatives/api-reference/ + """ + + for tweet in self.get_published_tweets(): + if "card_uri" in tweet: + card_fetch = self.get_card_fetch(card_uri=tweet["card_uri"]) + card_attributes = { + attr: getattr(card_fetch, attr, None) + for attr in self.entity_attributes + } + record = { + "tweet_id": tweet["tweet_id"], + "card_uri": tweet["card_uri"], + **card_attributes, + } + yield record + + def get_published_tweets(self): + """ + Step 1 of 'ENTITY - CARD' report generation process: + Returns details on 'PUBLISHED' tweets, as a generator of dictionnaries + Documentation: https://developer.twitter.com/en/docs/ads/creatives/api-reference/tweets + """ + + resource = f"/{API_VERSION}/accounts/{self.account.id}/tweets" + params = {"tweet_type": "PUBLISHED"} + request = Request(self.client, "get", resource, params=params) + + yield from Cursor(None, request) + + def get_card_fetch(self, card_uri): + """ + Step 2 of 'ENTITY - CARD' report generation process: + Returns the CartFetch object associated with a specific card_uri + Documentation: https://developer.twitter.com/en/docs/ads/creatives/api-reference/cards-fetch + """ + + return CardsFetch.load(self.account, card_uris=[card_uri]).first + + def get_reach_report(self): + """ + Get 'REACH' report through the 'Reach and Average Frequency' endpoint of Twitter Ads API. + Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/reach + """ + + resource = f"/{API_VERSION}/stats/accounts/{self.account.id}/reach/{self.entity.lower()}s" + entity_ids = self.get_active_entity_ids() + + for chunk_entity_ids in split_list(entity_ids, MAX_ENTITY_IDS_PER_JOB): + params = { + "account_id": self.account.id, + f"{self.entity.lower()}_ids": ",".join(entity_ids), + "start_time": self.start_date.strftime(API_DATEFORMAT), + "end_time": self.end_date.strftime(API_DATEFORMAT), + } + request = Request(self.client, "get", resource, params=params) + yield from Cursor(None, request) + + def add_request_or_period_dates(self, record): + """ + Add request_date, period_start_date and/or period_end_date to a JSON-like record. + """ + + def check_add_period_date_to_report(): + return ( + self.report_type == "ANALYTICS" and self.granularity == "TOTAL" + ) or self.report_type == "REACH" + + if self.add_request_date_to_report: + record["request_date"] = datetime.today().strftime(REP_DATEFORMAT) + + if check_add_period_date_to_report(): + record["period_start_date"] = self.start_date.strftime(REP_DATEFORMAT) + record["period_end_date"] = (self.end_date - timedelta(days=1)).strftime( + REP_DATEFORMAT + ) + + return record + + def read(self): + + if self.report_type == "ANALYTICS": + entity_ids = self.get_active_entity_ids() + + total_jobs = (len(entity_ids) // MAX_ENTITY_IDS_PER_JOB) + 1 + logging.info(f"Processing a total of {total_jobs} jobs") + + data = [] + for chunk_entity_ids in split_list( + entity_ids, MAX_ENTITY_IDS_PER_JOB * MAX_CONCURRENT_JOBS + ): + job_ids = self.get_job_ids(chunk_entity_ids) + data += self.get_analytics_report(job_ids) + + elif self.report_type == "REACH": + data = self.get_reach_report() + + elif self.report_type == "ENTITY": + if self.entity == "CARD": + data = self.get_cards_report() + else: + data = self.get_campaign_management_report() + + def result_generator(): + for record in data: + yield self.add_request_or_period_dates(record) + + yield JSONStream("results_" + self.account.id, result_generator()) diff --git a/nck/readers/yandex_statistics_reader.py b/nck/readers/yandex_statistics_reader.py index f5b9eebd..8332c52e 100644 --- a/nck/readers/yandex_statistics_reader.py +++ b/nck/readers/yandex_statistics_reader.py @@ -27,16 +27,20 @@ import nck.helpers.api_client_helper as api_client_helper from nck.clients.api_client import ApiClient from nck.commands.command import processor -from nck.helpers.yandex_helper import (DATE_RANGE_TYPES, LANGUAGES, OPERATORS, - REPORT_TYPES, STATS_FIELDS) +from nck.helpers.yandex_helper import ( + DATE_RANGE_TYPES, + LANGUAGES, + OPERATORS, + REPORT_TYPES, + STATS_FIELDS, +) from nck.readers.reader import Reader from nck.streams.json_stream import JSONStream from nck.utils.args import extract_args -from nck.utils.text import get_generator_dict_from_str_tsv +from nck.utils.text import get_report_generator_from_flat_file class StrList(click.ParamType): - def convert(self, value, param, ctx): return value.split(",") @@ -48,21 +52,16 @@ def convert(self, value, param, ctx): @click.command(name="read_yandex_statistics") @click.option("--yandex-token", required=True) -@click.option( - "--yandex-report-language", - type=click.Choice(LANGUAGES), - default="en" -) +@click.option("--yandex-report-language", type=click.Choice(LANGUAGES), default="en") @click.option( "--yandex-filter", "yandex_filters", multiple=True, - type=click.Tuple([click.Choice(STATS_FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE]) -) -@click.option( - "--yandex-max-rows", - type=int + type=click.Tuple( + [click.Choice(STATS_FIELDS), click.Choice(OPERATORS), STR_LIST_TYPE] + ), ) +@click.option("--yandex-max-rows", type=int) @click.option( "--yandex-field-name", "yandex_fields", @@ -73,36 +72,22 @@ def convert(self, value, param, ctx): "Fields to output in the report (columns)." "For the full list of fields and their meanings, " "see https://tech.yandex.com/direct/doc/reports/fields-list-docpage/" - ) + ), ) @click.option( "--yandex-report-name", - default=f"stats_report_{datetime.date.today()}_{random.randrange(10000)}" -) -@click.option( - "--yandex-report-type", - type=click.Choice(REPORT_TYPES), - required=True -) -@click.option( - "--yandex-date-range", - type=click.Choice(DATE_RANGE_TYPES), - required=True + default=f"stats_report_{datetime.date.today()}_{random.randrange(10000)}", ) +@click.option("--yandex-report-type", type=click.Choice(REPORT_TYPES), required=True) +@click.option("--yandex-date-range", type=click.Choice(DATE_RANGE_TYPES), required=True) @click.option( "--yandex-include-vat", type=click.BOOL, required=True, - help="Whether to include VAT in the monetary amounts in the report." -) -@click.option( - "--yandex-date-start", - type=click.DateTime() -) -@click.option( - "--yandex-date-stop", - type=click.DateTime() + help="Whether to include VAT in the monetary amounts in the report.", ) +@click.option("--yandex-date-start", type=click.DateTime()) +@click.option("--yandex-date-stop", type=click.DateTime()) @processor("yandex_token") def yandex_statistics(**kwargs): return YandexStatisticsReader(**extract_args("yandex_", kwargs)) @@ -112,7 +97,6 @@ def yandex_statistics(**kwargs): class YandexStatisticsReader(Reader): - def __init__( self, token, @@ -121,7 +105,7 @@ def __init__( report_name: str, date_range: str, include_vat: bool, - **kwargs + **kwargs, ): self.token = token self.fields = list(fields) @@ -137,22 +121,20 @@ def result_generator(self): headers = self._build_request_headers() while True: response = api_client.execute_request( - url="reports", - body=body, - headers=headers, - stream=True + url="reports", body=body, headers=headers, stream=True ) if response.status_code == HTTPStatus.CREATED: waiting_time = int(response.headers["retryIn"]) - logger.info(f"Report added to queue. Should be ready in {waiting_time} min.") + logger.info( + f"Report added to queue. Should be ready in {waiting_time} min." + ) time.sleep(waiting_time * 60) elif response.status_code == HTTPStatus.ACCEPTED: logger.info("Report in queue.") elif response.status_code == HTTPStatus.OK: logger.info("Report successfully retrieved.") - return get_generator_dict_from_str_tsv( - response.iter_lines(), - skip_first_row=True + return get_report_generator_from_flat_file( + response.iter_lines(), delimiter="\t", skip_n_first=1, ) elif response.status_code == HTTPStatus.BAD_REQUEST: logger.error("Invalid request.") @@ -175,21 +157,25 @@ def _build_request_body(self) -> Dict: api_client_helper.get_dict_with_keys_converted_to_new_string_format( field=filter_element[0], operator=filter_element[1], - values=filter_element[2] + values=filter_element[2], ) for filter_element in self.kwargs["filters"] ] - body["params"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( + body[ + "params" + ] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( selection_criteria=selection_criteria, field_names=self.fields, report_name=self.report_name, report_type=self.report_type, date_range_type=self.date_range, format="TSV", - include_v_a_t="YES" if self.include_vat else "NO" + include_v_a_t="YES" if self.include_vat else "NO", ) if self.kwargs["max_rows"] is not None: - body["params"]["Page"] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( + body["params"][ + "Page" + ] = api_client_helper.get_dict_with_keys_converted_to_new_string_format( limit=self.kwargs["max_rows"] ) return body @@ -197,7 +183,7 @@ def _build_request_body(self) -> Dict: def _build_request_headers(self) -> Dict: return { "skipReportSummary": "true", - "Accept-Language": self.kwargs["report_language"] + "Accept-Language": self.kwargs["report_language"], } def _add_custom_dates_if_set(self) -> Dict: @@ -207,21 +193,22 @@ def _add_custom_dates_if_set(self) -> Dict: and self.kwargs["date_stop"] is not None and self.date_range == "CUSTOM_DATE" ): - selection_criteria["DateFrom"] = self.kwargs["date_start"].strftime("%Y-%m-%d") + selection_criteria["DateFrom"] = self.kwargs["date_start"].strftime( + "%Y-%m-%d" + ) selection_criteria["DateTo"] = self.kwargs["date_stop"].strftime("%Y-%m-%d") elif ( self.kwargs["date_start"] is not None and self.kwargs["date_stop"] is not None and self.date_range != "CUSTOM_DATE" ): - raise click.ClickException("Wrong date range. If start and stop dates are set, should be CUSTOM_DATE.") - elif ( - ( - self.kwargs["date_start"] is not None - or self.kwargs["date_stop"] is not None + raise click.ClickException( + "Wrong date range. If start and stop dates are set, should be CUSTOM_DATE." ) - and self.date_range != "CUSTOM_DATE" - ): + elif ( + self.kwargs["date_start"] is not None + or self.kwargs["date_stop"] is not None + ) and self.date_range != "CUSTOM_DATE": raise click.ClickException( ( "Wrong combination of date parameters. " @@ -229,17 +216,12 @@ def _add_custom_dates_if_set(self) -> Dict: ) ) elif ( - ( - self.kwargs["date_start"] is None - or self.kwargs["date_stop"] is None + self.kwargs["date_start"] is None or self.kwargs["date_stop"] is None + ) and self.date_range == "CUSTOM_DATE": + raise click.ClickException( + "Missing at least one date. Have you set start and stop dates?" ) - and self.date_range == "CUSTOM_DATE" - ): - raise click.ClickException("Missing at least one date. Have you set start and stop dates?") return selection_criteria def read(self): - yield JSONStream( - f"results_{self.report_type}", - self.result_generator() - ) + yield JSONStream(f"results_{self.report_type}", self.result_generator()) diff --git a/nck/streams/normalized_json_stream.py b/nck/streams/normalized_json_stream.py index 164b5e83..1bc34eb8 100644 --- a/nck/streams/normalized_json_stream.py +++ b/nck/streams/normalized_json_stream.py @@ -51,5 +51,6 @@ def _normalize_key(key): .replace("[", "_") .replace("]", "_") .replace(".", "_") + .replace("%", "per") .strip("_") ) diff --git a/nck/utils/exceptions.py b/nck/utils/exceptions.py new file mode 100644 index 00000000..495764d2 --- /dev/null +++ b/nck/utils/exceptions.py @@ -0,0 +1,29 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + +class RetryTimeoutError(Exception): + """Raised when a query exceeds it's time limit threshold.""" + + pass + + +class SdfOperationError(Exception): + """Raised when a sdf operation has failed.""" + + pass diff --git a/nck/utils/file_reader.py b/nck/utils/file_reader.py index ce41175a..149e9208 100644 --- a/nck/utils/file_reader.py +++ b/nck/utils/file_reader.py @@ -19,9 +19,23 @@ import csv import codecs import gzip +import zipfile import json +def unzip(input_file, output_path): + with zipfile.ZipFile(input_file, 'r') as zip_ref: + zip_ref.extractall(output_path) + + +def sdf_to_njson_generator(path_to_file): + csv_reader = CSVReader(csv_delimiter=",", csv_fieldnames=None) + with open(path_to_file, "rb") as fd: + dict_reader = csv_reader.read_csv(fd) + for line in dict_reader: + yield line + + def format_csv_delimiter(csv_delimiter): _csv_delimiter = csv_delimiter.encode().decode("unicode_escape") if csv_delimiter == "newline": @@ -32,9 +46,7 @@ def format_csv_delimiter(csv_delimiter): def format_csv_fieldnames(csv_fieldnames): - if csv_fieldnames is None: - _csv_fieldnames = csv_fieldnames - elif isinstance(csv_fieldnames, list): + if isinstance(csv_fieldnames, list): _csv_fieldnames = csv_fieldnames elif isinstance(csv_fieldnames, (str, bytes)): _csv_fieldnames = json.loads(csv_fieldnames) @@ -49,8 +61,7 @@ def format_csv_fieldnames(csv_fieldnames): class CSVReader(object): def __init__(self, csv_delimiter, csv_fieldnames, **kwargs): self.csv_delimiter = format_csv_delimiter(csv_delimiter) - self.csv_fieldnames = format_csv_fieldnames(csv_fieldnames) - + self.csv_fieldnames = format_csv_fieldnames(csv_fieldnames) if csv_fieldnames is not None else None self.csv_reader = lambda fd: self.read_csv(fd, **kwargs) def read_csv(self, fd, **kwargs): diff --git a/nck/utils/text.py b/nck/utils/text.py index 2813966d..50621451 100644 --- a/nck/utils/text.py +++ b/nck/utils/text.py @@ -16,105 +16,117 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging -from typing import Dict, Generator, List, Union import re import csv from io import StringIO +from collections import deque +from itertools import islice + + +def get_report_generator_from_flat_file( + line_iterator, + delimiter=",", + skip_n_first=0, + skip_n_last=0, + add_column=False, + column_dict={}, +): + """ + From the line iterator of a flat file: + [ + "Date,AdvertiserId,Impressions", + "2020-01-01,1234,10", + "2020-01-01,5678,20" + ] + Return a generator of {column: value} dictionnaries: + [ + {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, + {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"} + ] + Params + :line_iterator (iter): line iterator of the file to process + :delimiter (str): delimiter to parse file lines + :skip_n_first (int): nb of lines to skip at begining of file (excl. blank lines) + :skip_n_last (int): nb of lines to skip at end of file (excl. blank lines) + :add_column (bool): wether to add a fixed {column: value} at the end of each record + :column_dict (dict): if add_column is True, {column: value} dictionnary + to add at the end of each record (can include multiple column_names) + """ -from nck.utils.date_handler import get_date_start_and_date_stop_from_range - - -def add_column_value_to_csv_line_iterator(line_iterator, columname, value): first_line = True - for line in line_iterator: - if line == "": - break + for line in skip(line_iterator, skip_n_first, skip_n_last): + line = decode_if_needed(line) if first_line: first_line = False - if columname in line.split(","): - raise Exception("Column {} already present".format(columname)) - yield line + "," + columname + headers = parse_decoded_line(line, delimiter) else: - yield line + "," + value - - -def get_generator_dict_from_str_csv( - line_iterator: Generator[Union[bytes, str], None, None], - add_date=False, - day_range=None, - date_format="%Y-%m-%d", - skip_last_row=True, -) -> Generator[Dict[str, str], None, None]: - first_line = next(line_iterator) - headers = ( - parse_decoded_line(first_line.decode("utf-8")) - if isinstance(first_line, bytes) - else parse_decoded_line(first_line) - ) - if add_date: - headers.extend(["date_start", "date_stop"]) - - next_line = next(line_iterator, None) - while next_line is not None: - current_line = next_line - if isinstance(current_line, bytes): - try: - current_line = current_line.decode("utf-8") - except UnicodeDecodeError as err: + parsed_line = parse_decoded_line(line, delimiter) + if len(parsed_line) != len(headers): logging.warning( - "An error has occurred while parsing the file. " - "The line could not be decoded in %s." - "Invalid input that the codec failed on: %s", - err.encoding, - err.object[err.start : err.end], + f"Skipping line '{line}': length of parsed line doesn't match length of headers." ) - current_line = current_line.decode("utf-8", errors="ignore") + else: + record = dict(zip(headers, parsed_line)) + if add_column: + yield {**record, **column_dict} + else: + yield record + + +def decode_if_needed(line): + if isinstance(line, bytes): + try: + line = line.decode("utf-8") + except UnicodeDecodeError as e: + logging.warning( + "An error has occurred while parsing the file." + f"The line could not be decoded in {e.encoding}." + f"Invalid input that the codec failed on: {e.object[e.start : e.end]}" + ) + line = line.decode("utf-8", errors="ignore") + return line + + +def parse_decoded_line(line, delimiter=",", quotechar='"'): + line_as_file = StringIO(line) + reader = csv.reader( + line_as_file, + delimiter=delimiter, + quotechar=quotechar, + quoting=csv.QUOTE_ALL, + skipinitialspace=True, + ) + return next(reader) - next_line = next(line_iterator, "") - if len(current_line) == 0 or (skip_last_row and len(next_line) == 0): - break - if add_date: - start, end = get_date_start_and_date_stop_from_range(day_range) - current_line += f",{start.strftime(date_format)},{end.strftime(date_format)}" +def skip(iterator, n_first, n_last): + """ + Skips the n first and/or n last lines of a line iterator, + from which blank lines have been removed + """ + iterator = skip_blank(iterator) + if n_first > 0: + iterator = skip_first(iterator, n_first) + if n_last > 0: + iterator = skip_last(iterator, n_last) + yield from iterator - yield dict(zip(headers, parse_decoded_line(current_line))) +def skip_blank(iterator): + for item in iterator: + if item: + yield item -def get_generator_dict_from_str_tsv( - line_iterator: Generator[Union[bytes, str], None, None], skip_first_row=False -) -> Generator[Dict[str, str], None, None]: - if skip_first_row: - next(line_iterator) - headers_line = next(line_iterator) - headers = ( - parse_decoded_line(headers_line.decode("utf-8"), delimiter="\t") - if isinstance(headers_line, bytes) - else parse_decoded_line(headers_line, delimiter="\t") - ) - for line in line_iterator: - if isinstance(line, bytes): - try: - line = line.decode("utf-8") - except UnicodeDecodeError as err: - logging.warning( - "An error has occured while parsing the file. " - "The line could not be decoded in %s." - "Invalid input that the codec failed on: %s", - err.encoding, - err.object[err.start : err.end], - ) - line = line.decode("utf-8", errors="ignore") - yield dict(zip(headers, parse_decoded_line(line, delimiter="\t"))) +def skip_first(iterator, n): + yield from islice(iterator, n, None) -def parse_decoded_line(line: str, delimiter=",", quotechar='"') -> List[str]: - line_as_file = StringIO(line) - reader = csv.reader( - line_as_file, delimiter=delimiter, quotechar=quotechar, quoting=csv.QUOTE_ALL, skipinitialspace=True - ) - return next(reader) +def skip_last(iterator, n): + previous_items = deque(islice(iterator, n), n) + for item in iterator: + yield previous_items.popleft() + previous_items.append(item) def reformat_naming_for_bq(text, char="_"): diff --git a/nck/writers/local_writer.py b/nck/writers/local_writer.py index 14959fab..95f64dbb 100644 --- a/nck/writers/local_writer.py +++ b/nck/writers/local_writer.py @@ -36,7 +36,7 @@ def __init__(self, local_directory): def write(self, stream): """ - Write file to console, mainly used for debugging + Write file to disk at location given as parameter. """ path = os.path.join(self._local_directory, stream.name) diff --git a/nck/writers/s3_writer.py b/nck/writers/s3_writer.py index 74e6017f..49fad683 100644 --- a/nck/writers/s3_writer.py +++ b/nck/writers/s3_writer.py @@ -55,7 +55,7 @@ def __init__( @retry def write(self, stream): - logging.info("Writing file to S3") + logging.info("Start writing file to S3 ...") bucket = self._s3_resource.Bucket(self._bucket_name) if bucket not in self._s3_resource.buckets.all(): @@ -86,5 +86,5 @@ def write(self, stream): Params={"Bucket": self._bucket_name, "Key": stream.name}, ExpiresIn=3600, ) - + logging.info(f"file written at location {url_file}") return url_file, bucket diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..e34796ec --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +line-length = 120 \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index 186f7889..c8adfde3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,4 +4,5 @@ pytest flake8 nose parameterized==0.7.1 -freezegun==0.3.15 \ No newline at end of file +freezegun==0.3.15 +pre-commit==2.7.1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e0bacf59..643c47c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ curlify==2.2.1 cx-Oracle==7.3.0 docopt==0.6.2 docutils==0.15.2 -facebook-business==5.0.2 +facebook-business==8.0.4 google-api-core==1.14.3 google-api-python-client==1.4.2 google-auth==1.7.2 @@ -61,5 +61,6 @@ uritemplate==3.0.0 urllib3==1.25.7 Werkzeug==0.16.0 googleads==22.0.0 +twitter-ads==7.0.1 pyjwt==1.7.1 -cryptography==2.9 \ No newline at end of file +cryptography==2.9 diff --git a/setup.py b/setup.py index 07d6d7f9..95134467 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,6 @@ from setuptools import setup, find_packages -with open('README.md') as readme_file: - readme = readme_file.read() with open('requirements.txt') as requirements_file: requirements = [el.strip() for el in requirements_file.readlines()] @@ -47,9 +45,7 @@ ], }, install_requires=requirements, - # long_description=readme + '\n\n' + history, include_package_data=True, - # keywords='nautilus_connectors', name='nck', packages=find_packages(), setup_requires=setup_requirements, diff --git a/tests/readers/test_dbm_reader.py b/tests/readers/test_dbm_reader.py index 279d93c8..eb676bc9 100644 --- a/tests/readers/test_dbm_reader.py +++ b/tests/readers/test_dbm_reader.py @@ -23,78 +23,51 @@ class TestDbmReader(unittest.TestCase): - def mock_dbm_reader(self, **kwargs): for param, value in kwargs.items(): setattr(self, param, value) - @mock.patch.object(DbmReader, '__init__', mock_dbm_reader) + @mock.patch.object(DbmReader, "__init__", mock_dbm_reader) def test_get_query_body(self): kwargs = {} reader = DbmReader(**kwargs) - reader.kwargs = { - 'filter': [('FILTER_ADVERTISER', 1)] - } + reader.kwargs = {"filter": [("FILTER_ADVERTISER", 1)]} expected_query_body = { - 'metadata': { - 'format': 'CSV', - 'title': 'NO_TITLE_GIVEN', - 'dataRange': 'LAST_7_DAYS' + "kind": "doubleclickbidmanager#query", + "metadata": {"format": "CSV", "title": "NO_TITLE_GIVEN", "dataRange": "LAST_7_DAYS"}, + "params": { + "type": "TYPE_TRUEVIEW", + "groupBys": [], + "metrics": [], + "filters": [{"type": "FILTER_ADVERTISER", "value": "1"}], }, - 'params': { - 'type': 'TYPE_TRUEVIEW', - 'groupBys': None, - 'metrics': None, - 'filters': [ - { - 'type': 'FILTER_ADVERTISER', - 'value': '1' - } - ] - }, - 'schedule': { - 'frequency': 'ONE_TIME' - } + "schedule": {"frequency": "ONE_TIME"}, } self.assertDictEqual(reader.get_query_body(), expected_query_body) - @mock.patch.object(DbmReader, '__init__', mock_dbm_reader) + @mock.patch.object(DbmReader, "__init__", mock_dbm_reader) def test_get_query_body_ms_conversion(self): kwargs = {} reader = DbmReader(**kwargs) reader.kwargs = { - 'filter': [('FILTER_ADVERTISER', 1)], - 'start_date': datetime.datetime( - 2020, 1, 15, tzinfo=datetime.timezone.utc - ), - 'end_date': datetime.datetime( - 2020, 1, 18, tzinfo=datetime.timezone.utc - ) + "filter": [("FILTER_ADVERTISER", 1)], + "start_date": datetime.datetime(2020, 1, 15, tzinfo=datetime.timezone.utc), + "end_date": datetime.datetime(2020, 1, 18, tzinfo=datetime.timezone.utc), } expected_query_body = { - 'metadata': { - 'format': 'CSV', - 'title': 'NO_TITLE_GIVEN', - 'dataRange': 'CUSTOM_DATES' - }, - 'params': { - 'type': 'TYPE_TRUEVIEW', - 'groupBys': None, - 'metrics': None, - 'filters': [ - { - 'type': 'FILTER_ADVERTISER', - 'value': '1' - } - ] - }, - 'schedule': { - 'frequency': 'ONE_TIME' + "kind": "doubleclickbidmanager#query", + "metadata": {"format": "CSV", "title": "NO_TITLE_GIVEN", "dataRange": "CUSTOM_DATES"}, + "params": { + "type": "TYPE_TRUEVIEW", + "groupBys": [], + "metrics": [], + "filters": [{"type": "FILTER_ADVERTISER", "value": "1"}], }, - 'reportDataStartTimeMs': 1579132800000, - 'reportDataEndTimeMs': 1579392000000 + "schedule": {"frequency": "ONE_TIME"}, + "reportDataStartTimeMs": 1579132800000, + "reportDataEndTimeMs": 1579392000000, } self.assertDictEqual(reader.get_query_body(), expected_query_body) diff --git a/tests/readers/test_dv360_reader.py b/tests/readers/test_dv360_reader.py new file mode 100644 index 00000000..dc738e4b --- /dev/null +++ b/tests/readers/test_dv360_reader.py @@ -0,0 +1,30 @@ +from nck.readers.dv360_reader import DV360Reader +from unittest import TestCase, mock + + +class TestDV360Reader(TestCase): + + def mock_dv360_reader(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + @mock.patch.object(DV360Reader, '__init__', mock_dv360_reader) + def test_get_sdf_body(self): + kwargs = {} + reader = DV360Reader(**kwargs) + reader.kwargs = { + "file_type": ["FILE_TYPE_INSERTION_ORDER", "FILE_TYPE_CAMPAIGN"], + "filter_type": "FILTER_TYPE_ADVERTISER_ID", + "advertiser_id": "4242424" + } + + expected_query_body = { + "parentEntityFilter": { + "fileType": ["FILE_TYPE_INSERTION_ORDER", "FILE_TYPE_CAMPAIGN"], + "filterType": "FILTER_TYPE_ADVERTISER_ID" + }, + "version": "SDF_VERSION_5_2", + "advertiserId": "4242424" + } + + self.assertDictEqual(reader.get_sdf_body(), expected_query_body) diff --git a/tests/readers/test_ttd.py b/tests/readers/test_ttd.py new file mode 100644 index 00000000..93892ac3 --- /dev/null +++ b/tests/readers/test_ttd.py @@ -0,0 +1,243 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from unittest import TestCase, mock +from nck.readers.ttd_reader import TheTradeDeskReader + +from datetime import datetime +from click import ClickException + + +class TheTradeDeskReaderTest(TestCase): + + kwargs = { + "login": "ttd_api_abcde@client.com", + "password": "XXXXX", + "advertiser_id": ["advertiser_1", "advertiser_2"], + "report_template_name": "adgroup_performance_template", + "report_schedule_name": "adgroup_performance_schedule", + "start_date": datetime(2020, 1, 1), + "end_date": datetime(2020, 3, 1), + "normalize_stream": False + } + + @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + def test_validate_dates(self, mock_build_headers): + temp_kwargs = self.kwargs.copy() + params = {"start_date": datetime(2020, 1, 3), "end_date": datetime(2020, 1, 1)} + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TheTradeDeskReader(**temp_kwargs) + + @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch( + "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", + return_value={ + "Result": [ + { + "ReportTemplateId": 1234, + "Name": "adgroup_performance_template", + "Type": "Custom", + "Format": "Text", + "CreatedByUserName": "Artefact", + } + ], + "ResultCount": 1, + }, + ) + def test_get_report_template_id_if_exactly_1_match( + self, mock_build_headers, mock_api_call + ): + reader = TheTradeDeskReader(**self.kwargs) + reader._get_report_template_id() + self.assertEqual(reader.report_template_id, 1234) + + @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch( + "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", + return_value={ + "Result": [ + { + "ReportTemplateId": 1234, + "Name": "adgroup_performance_template", + "Type": "Custom", + "Format": "Text", + "CreatedByUserName": "Artefact", + }, + { + "ReportTemplateId": 4321, + "Name": "adgroup_media_costs_template", + "Type": "Custom", + "Format": "Text", + "CreatedByUserName": "Artefact", + }, + ], + "ResultCount": 2, + }, + ) + def test_get_report_template_id_if_more_than_1_match( + self, mock_build_headers, mock_api_call + ): + with self.assertRaises(Exception): + TheTradeDeskReader(**self.kwargs)._get_report_template_id() + + @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch( + "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", + return_value={"Result": [], "ResultCount": 0}, + ) + def test_get_report_template_id_if_no_match( + self, mock_build_headers, mock_api_call + ): + with self.assertRaises(Exception): + TheTradeDeskReader(**self.kwargs)._get_report_template_id() + + @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch( + "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", + return_value={ + "ReportScheduleId": 5678, + "ReportScheduleName": "adgroup_performance_schedule", + }, + ) + def test_create_report_schedule(self, mock_build_headers, mock_api_call): + reader = TheTradeDeskReader(**self.kwargs) + reader.report_template_id = 1234 + reader._create_report_schedule() + self.assertEqual(reader.report_schedule_id, 5678) + + @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("tenacity.BaseRetrying.wait", side_effect=lambda *args, **kwargs: 0) + @mock.patch( + "nck.readers.ttd_reader.TheTradeDeskReader._make_api_call", + side_effect=[ + { + "Result": [ + { + "ReportExecutionId": 8765, + "ReportExecutionState": "Pending", + "ReportScheduleId": 5678, + "ReportScheduleName": "adgroup_performance_schedule", + "ReportDeliveries": [{"DownloadURL": None}], + } + ], + "ResultCount": 1, + }, + { + "Result": [ + { + "ReportExecutionId": 8765, + "ReportExecutionState": "Completed", + "ReportScheduleId": 5678, + "ReportScheduleName": "adgroup_performance_schedule", + "ReportDeliveries": [{"DownloadURL": "https://download.url"}], + } + ], + "ResultCount": 1, + }, + ], + ) + def test_wait_for_download_url(self, mock_build_headers, mock_retry, mock_api_call): + reader = TheTradeDeskReader(**self.kwargs) + reader.report_schedule_id = 5678 + reader._wait_for_download_url() + self.assertEqual(reader.download_url, "https://download.url") + + @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("tenacity.BaseRetrying.wait", side_effect=lambda *args, **kwargs: 0) + @mock.patch.object(TheTradeDeskReader, "_get_report_template_id", lambda *args: None) + @mock.patch.object(TheTradeDeskReader, "_create_report_schedule", lambda *args: None) + @mock.patch.object(TheTradeDeskReader, "_wait_for_download_url", lambda *args: None) + @mock.patch( + "nck.readers.ttd_reader.TheTradeDeskReader._download_report", + return_value=iter( + [ + { + "Date": "2020-01-01T00:00:00", + "Advertiser ID": "XXXXX", + "Impressions": 10 + }, + { + "Date": "2020-02-01T00:00:00", + "Advertiser ID": "XXXXX", + "Impressions": 11 + }, + { + "Date": "2020-02-03T00:00:00", + "Advertiser ID": "XXXXX", + "Impressions": 12 + }, + ] + ), + ) + def test_read_if_normalize_stream_is_False(self, mock_build_headers, mock_retry, mock_download_report): + reader = TheTradeDeskReader(**self.kwargs) + reader.report_template_id = 1234 + reader.report_schedule_id = 5678 + reader.download_url = "https://download.url" + output = next(reader.read()) + expected = [ + {"Date": "2020-01-01", "Advertiser ID": "XXXXX", "Impressions": 10}, + {"Date": "2020-02-01", "Advertiser ID": "XXXXX", "Impressions": 11}, + {"Date": "2020-02-03", "Advertiser ID": "XXXXX", "Impressions": 12}, + ] + for output_record, expected_record in zip(output.readlines(), iter(expected)): + self.assertEqual(output_record, expected_record) + + @mock.patch("nck.readers.ttd_reader.TheTradeDeskReader._build_headers", return_value={}) + @mock.patch("tenacity.BaseRetrying.wait", side_effect=lambda *args, **kwargs: 0) + @mock.patch.object(TheTradeDeskReader, "_get_report_template_id", lambda *args: None) + @mock.patch.object(TheTradeDeskReader, "_create_report_schedule", lambda *args: None) + @mock.patch.object(TheTradeDeskReader, "_wait_for_download_url", lambda *args: None) + @mock.patch( + "nck.readers.ttd_reader.TheTradeDeskReader._download_report", + return_value=iter( + [ + { + "Date": "2020-01-01T00:00:00", + "Advertiser ID": "XXXXX", + "Impressions": 10, + }, + { + "Date": "2020-02-01T00:00:00", + "Advertiser ID": "XXXXX", + "Impressions": 11, + }, + { + "Date": "2020-02-03T00:00:00", + "Advertiser ID": "XXXXX", + "Impressions": 12, + }, + ] + ), + ) + def test_read_if_normalize_stream_is_True(self, mock_build_headers, mock_retry, mock_download_report): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"normalize_stream": True}) + reader = TheTradeDeskReader(**temp_kwargs) + reader.report_template_id = 1234 + reader.report_schedule_id = 5678 + reader.download_url = "https://download.url" + output = next(reader.read()) + expected = [ + {"Date": "2020-01-01", "Advertiser_ID": "XXXXX", "Impressions": 10}, + {"Date": "2020-02-01", "Advertiser_ID": "XXXXX", "Impressions": 11}, + {"Date": "2020-02-03", "Advertiser_ID": "XXXXX", "Impressions": 12}, + ] + for output_record, expected_record in zip(output.readlines(), iter(expected)): + self.assertEqual(output_record, expected_record) diff --git a/tests/readers/test_twitter_reader.py b/tests/readers/test_twitter_reader.py new file mode 100644 index 00000000..d17def7a --- /dev/null +++ b/tests/readers/test_twitter_reader.py @@ -0,0 +1,342 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from unittest import TestCase, mock +from click import ClickException +from freezegun import freeze_time +from datetime import datetime + +from twitter_ads.client import Client + +from nck.readers.twitter_reader import TwitterReader + + +class TwitterReaderTest(TestCase): + + kwargs = { + "consumer_key": "", + "consumer_secret": "", + "access_token": "", + "access_token_secret": "", + "account_id": "", + "report_type": None, + "entity": None, + "entity_attribute": [], + "granularity": None, + "metric_group": [], + "placement": None, + "segmentation_type": None, + "platform": None, + "country": None, + "add_request_date_to_report": None, + "start_date": datetime(2020, 1, 1), + "end_date": datetime(2020, 1, 3), + } + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_validate_dates(self): + temp_kwargs = self.kwargs.copy() + params = {"start_date": datetime(2020, 1, 3), "end_date": datetime(2020, 1, 1)} + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TwitterReader(**temp_kwargs) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_validate_analytics_segmentation_if_missing_platform(self): + temp_kwargs = self.kwargs.copy() + params = { + "report_type": "ANALYTICS", + "segmentation_type": "DEVICES", + "platform": None, + } + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TwitterReader(**temp_kwargs) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_validate_analytics_segmentation_if_missing_country(self): + temp_kwargs = self.kwargs.copy() + params = { + "report_type": "ANALYTICS", + "segmentation_type": "CITIES", + "country": None, + } + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TwitterReader(**temp_kwargs) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_validate_analytics_metric_groups_if_funding_instrument(self): + temp_kwargs = self.kwargs.copy() + params = { + "report_type": "ANALYTICS", + "entity": "FUNDING_INSTRUMENT", + "metric_group": ["ENGAGEMENT", "VIDEO"], + } + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TwitterReader(**temp_kwargs) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_validate_analytics_metric_groups_if_mobile_conversion(self): + temp_kwargs = self.kwargs.copy() + params = { + "report_type": "ANALYTICS", + "metric_group": ["MOBILE_CONVERSION", "ENGAGEMENT"], + } + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TwitterReader(**temp_kwargs) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_validate_analytics_entity(self): + temp_kwargs = self.kwargs.copy() + params = {"report_type": "ANALYTICS", "entity": "CARD"} + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TwitterReader(**temp_kwargs) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_validate_reach_entity(self): + temp_kwargs = self.kwargs.copy() + params = {"report_type": "REACH", "entity": "LINE_ITEM"} + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TwitterReader(**temp_kwargs) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_validate_entity_attributes(self): + temp_kwargs = self.kwargs.copy() + params = { + "report_type": "ENTITY", + "entity": "CAMPAIGN", + "entity_attribute": ["id", "name", "XXXXX"], + } + temp_kwargs.update(params) + with self.assertRaises(ClickException): + TwitterReader(**temp_kwargs) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_get_daily_period_items(self): + temp_kwargs = self.kwargs.copy() + params = {"start_date": datetime(2020, 1, 1), "end_date": datetime(2020, 1, 3)} + temp_kwargs.update(params) + output = TwitterReader(**temp_kwargs).get_daily_period_items() + expected = [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)] + self.assertEqual(output, expected) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_parse_with_total_granularity(self): + temp_kwargs = self.kwargs.copy() + params = {"granularity": "TOTAL", "segmentation_type": None} + temp_kwargs.update(params) + raw_analytics_response = { + "time_series_length": 1, + "data": [ + { + "id": "XXXXX", + "id_data": [ + {"segment": None, "metrics": {"retweets": [11], "likes": [12]}} + ], + }, + { + "id": "YYYYY", + "id_data": [ + {"segment": None, "metrics": {"retweets": [21], "likes": [22]}} + ], + }, + ], + } + output = TwitterReader(**temp_kwargs).parse(raw_analytics_response) + expected = [ + {"id": "XXXXX", "retweets": 11, "likes": 12}, + {"id": "YYYYY", "retweets": 21, "likes": 22}, + ] + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_parse_with_day_granularity(self): + temp_kwargs = self.kwargs.copy() + params = { + "granularity": "DAY", + "segmentation_type": None, + "start_date": datetime(2020, 1, 1), + "end_date": datetime(2020, 1, 3), + } + temp_kwargs.update(params) + raw_analytics_response = { + "time_series_length": 3, + "data": [ + { + "id": "XXXXX", + "id_data": [ + { + "segment": None, + "metrics": { + "retweets": [11, 12, 13], + "likes": [14, 15, 16], + }, + } + ], + }, + { + "id": "YYYYY", + "id_data": [ + { + "segment": None, + "metrics": { + "retweets": [21, 22, 23], + "likes": [24, 25, 26], + }, + } + ], + }, + ], + } + output = TwitterReader(**temp_kwargs).parse(raw_analytics_response) + expected = [ + {"date": "2020-01-01", "id": "XXXXX", "retweets": 11, "likes": 14}, + {"date": "2020-01-02", "id": "XXXXX", "retweets": 12, "likes": 15}, + {"date": "2020-01-03", "id": "XXXXX", "retweets": 13, "likes": 16}, + {"date": "2020-01-01", "id": "YYYYY", "retweets": 21, "likes": 24}, + {"date": "2020-01-02", "id": "YYYYY", "retweets": 22, "likes": 25}, + {"date": "2020-01-03", "id": "YYYYY", "retweets": 23, "likes": 26}, + ] + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_parse_with_segment(self): + temp_kwargs = self.kwargs.copy() + params = {"granularity": "TOTAL", "segmentation_type": "GENDER"} + temp_kwargs.update(params) + raw_analytics_response = { + "time_series_length": 1, + "data": [ + { + "id": "XXXXX", + "id_data": [ + { + "segment": {"segment_name": "Male"}, + "metrics": {"retweets": [11], "likes": [12]}, + }, + { + "segment": {"segment_name": "Female"}, + "metrics": {"retweets": [13], "likes": [14]}, + }, + ], + }, + { + "id": "YYYYY", + "id_data": [ + { + "segment": {"segment_name": "Male"}, + "metrics": {"retweets": [21], "likes": [22]}, + }, + { + "segment": {"segment_name": "Female"}, + "metrics": {"retweets": [23], "likes": [24]}, + }, + ], + }, + ], + } + output = TwitterReader(**temp_kwargs).parse(raw_analytics_response) + expected = [ + {"id": "XXXXX", "gender": "Male", "retweets": 11, "likes": 12}, + {"id": "XXXXX", "gender": "Female", "retweets": 13, "likes": 14}, + {"id": "YYYYY", "gender": "Male", "retweets": 21, "likes": 22}, + {"id": "YYYYY", "gender": "Female", "retweets": 23, "likes": 24}, + ] + for output_record, expected_record in zip(output, expected): + self.assertDictEqual(output_record, expected_record) + + @freeze_time("2020-01-03") + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + def test_add_request_or_period_dates(self): + temp_kwargs = self.kwargs.copy() + params = { + "report_type": "ANALYTICS", + "granularity": "TOTAL", + "start_date": datetime(2020, 1, 1), + "end_date": datetime(2020, 1, 3), + "add_request_date_to_report": True, + } + temp_kwargs.update(params) + record = {"id": "XXXXX", "name": "Artefact Campaign"} + output = TwitterReader(**temp_kwargs).add_request_or_period_dates(record) + expected = { + "id": "XXXXX", + "name": "Artefact Campaign", + "period_start_date": "2020-01-01", + "period_end_date": "2020-01-03", + "request_date": "2020-01-03", + } + self.assertEqual(output, expected) + + def mock_get_job_result(*args): + job_result = mock.MagicMock() + job_result.status = "SUCCESS" + return job_result + + def mock_parse(*args): + yield from [ + {"id": "XXXXX", "retweets": 11, "likes": 12}, + {"id": "YYYYY", "retweets": 21, "likes": 22}, + ] + + @mock.patch.object(Client, "__init__", lambda *args: None) + @mock.patch.object(Client, "accounts", lambda *args: None) + @mock.patch.object( + TwitterReader, "get_active_entity_ids", lambda *args: ["XXXXX", "YYYYYY"] + ) + @mock.patch.object(TwitterReader, "get_job_ids", lambda *args: ["123456789"]) + @mock.patch.object(TwitterReader, "get_job_result", mock_get_job_result) + @mock.patch.object(TwitterReader, "get_raw_analytics_response", lambda *args: {}) + @mock.patch.object(TwitterReader, "parse", mock_parse) + def test_read_analytics_report(self): + temp_kwargs = self.kwargs.copy() + params = { + "report_type": "ANALYTICS", + "granularity": "DAY", + "add_request_date_to_report": False, + } + temp_kwargs.update(params) + reader = TwitterReader(**temp_kwargs) + reader.account = mock.MagicMock() + output = next(reader.read()) + expected = [ + {"id": "XXXXX", "retweets": 11, "likes": 12}, + {"id": "YYYYY", "retweets": 21, "likes": 22}, + ] + for output_record, expected_record in zip(output.readlines(), iter(expected)): + self.assertEqual(output_record, expected_record) diff --git a/tests/utils/test_text_utils.py b/tests/utils/test_text_utils.py index a12a167d..6268716c 100644 --- a/tests/utils/test_text_utils.py +++ b/tests/utils/test_text_utils.py @@ -17,292 +17,209 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging from unittest import TestCase -from datetime import date -from unittest.mock import patch -from parameterized import parameterized - -from nck.utils.text import get_generator_dict_from_str_csv, get_generator_dict_from_str_tsv, parse_decoded_line +from nck.utils.text import parse_decoded_line, get_report_generator_from_flat_file class TestTextUtilsMethod(TestCase): - def test_multiple_encodings(self): - test_string_to_encode = ( - "BR,test_partner,123,Active,test_advertiser,123," - "0,,test_io,123,Active,,test_line_item" - ',123,0,,"",0.00,41' - ) + def test_get_report_generator__multiple_encodings(self): + test_string_to_encode = "2020-01-01,France,1234,10" lines = [ - ( - b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach" - ), + b"Date,Country,AdvertiserId,Impressions", test_string_to_encode.encode("utf-8"), test_string_to_encode.encode("ascii"), test_string_to_encode.encode("windows-1252"), test_string_to_encode.encode("latin_1"), ] - line_iterator_multiple_encodings = (line for line in lines) - expected_dict = { - "Country": "BR", - "Partner": "test_partner", - "Partner ID": "123", - "Partner Status": "Active", - "Advertiser": "test_advertiser", - "Advertiser ID": "123", - "Advertiser Status": "0", - "Advertiser Integration Code": "", - "Insertion Order": "test_io", - "Insertion Order ID": "123", - "Insertion Order Status": "Active", - "Insertion Order Integration Code": "", - "Line Item": "test_line_item", - "Line Item ID": "123", - "Line Item Status": "0", - "Line Item Integration Code": "", - "Targeted Data Providers": "", - "Cookie Reach: Average Impression Frequency": "0.00", - "Cookie Reach: Impression Reach": "41", - } - for yielded_dict in get_generator_dict_from_str_csv(line_iterator_multiple_encodings): - self.assertDictEqual(yielded_dict, expected_dict) - - def test_blank_line(self): - lines = [ - ( - b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach" - ), - b"(Not desired last line) Total line: ,,,,,,,,,,100,100,100,100,100", - "", - ] - line_iterator_with_blank_line = (line for line in lines) - self.assertTrue(get_generator_dict_from_str_csv(line_iterator_with_blank_line)) - - lines.insert( - 1, - ( - b"BR,test_partner,123,Active,test_advertiser,123," - b"0,,test_io,123,Active,,test_line_item" - b',123,0,,"",0.00,41' - ), - ) - expected_dict = { - "Country": "BR", - "Partner": "test_partner", - "Partner ID": "123", - "Partner Status": "Active", - "Advertiser": "test_advertiser", - "Advertiser ID": "123", - "Advertiser Status": "0", - "Advertiser Integration Code": "", - "Insertion Order": "test_io", - "Insertion Order ID": "123", - "Insertion Order Status": "Active", - "Insertion Order Integration Code": "", - "Line Item": "test_line_item", - "Line Item ID": "123", - "Line Item Status": "0", - "Line Item Integration Code": "", - "Targeted Data Providers": "", - "Cookie Reach: Average Impression Frequency": "0.00", - "Cookie Reach: Impression Reach": "41", + line_iterator = iter(lines) + expected = { + "Date": "2020-01-01", + "Country": "France", + "AdvertiserId": "1234", + "Impressions": "10", } - line_iterator_with_blank_line = (line for line in lines) - for dic in get_generator_dict_from_str_csv(line_iterator_with_blank_line): - self.assertDictEqual(dic, expected_dict) + for output in get_report_generator_from_flat_file(line_iterator): + self.assertDictEqual(output, expected) - lines.append("This is something that should not be here.") - line_iterator_with_blank_line = (line for line in lines) - test_result = get_generator_dict_from_str_csv(line_iterator_with_blank_line) - self.assertEqual(len(list(test_result)), 1) - for dic in test_result: - self.assertEqual(dic, expected_dict) - - def test_invalid_byte(self): + def test_get_report_generator__invalid_byte(self): lines = [ - ( - b"Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - b" ID,Advertiser Status,Advertiser Integration Code,Insertion" - b" Order,Insertion Order ID,Insertion Order Status,Insertion" - b" Order Integration Code,Line Item,Line Item ID,Line Item" - b" Status,Line Item Integration Code,Targeted Data Providers," - b"Cookie Reach: Average Impression Frequency,Cookie Reach: " - b"Impression Reach" - ), - ( - b"BR,test_partner,123,Active,test_advertiser,123," - b"0,,test_io,123,Active,,test_line_item" - b',123,0,," \x91\xea\xd0$",0.00,41' - ), + b"Date,Country,AdvertiserId,Impressions", + b'2020-01-01," \x91\xea\xd0$",1234,10', ] - line_iterator_invalid_byte = (line for line in lines) - expected_dict = { - "Country": "BR", - "Partner": "test_partner", - "Partner ID": "123", - "Partner Status": "Active", - "Advertiser": "test_advertiser", - "Advertiser ID": "123", - "Advertiser Status": "0", - "Advertiser Integration Code": "", - "Insertion Order": "test_io", - "Insertion Order ID": "123", - "Insertion Order Status": "Active", - "Insertion Order Integration Code": "", - "Line Item": "test_line_item", - "Line Item ID": "123", - "Line Item Status": "0", - "Line Item Integration Code": "", - "Targeted Data Providers": " $", - "Cookie Reach: Average Impression Frequency": "0.00", - "Cookie Reach: Impression Reach": "41", + line_iterator = iter(lines) + expected = { + "Date": "2020-01-01", + "Country": " $", + "AdvertiserId": "1234", + "Impressions": "10", } - with self.assertLogs(level=logging.INFO) as cm: - for yielded_dict in get_generator_dict_from_str_csv(line_iterator_invalid_byte): - self.assertDictEqual(yielded_dict, expected_dict) + with self.assertLogs(level=logging.INFO) as log: + for output in get_report_generator_from_flat_file(line_iterator): + self.assertDictEqual(output, expected) self.assertEqual( - cm.output, + log.output, [ - "WARNING:root:An error has occurred while parsing the file. " + "WARNING:root:An error has occurred while parsing the file." "The line could not be decoded in utf-8." "Invalid input that the codec failed on: b'\\x91'" ], ) - def test_response_not_binary(self): - lines = [ - ( - "Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - " ID,Advertiser Status,Advertiser Integration Code,Insertion" - " Order,Insertion Order ID,Insertion Order Status,Insertion" - " Order Integration Code,Line Item,Line Item ID,Line Item" - " Status,Line Item Integration Code,Targeted Data Providers," - "Cookie Reach: Average Impression Frequency,Cookie Reach: " - "Impression Reach" - ), - ( - "BR,test_partner,123,Active,test_advertiser,123," - "0,,test_io,123,Active,,test_line_item" - ',123,0,,"",0.00,41' - ), - ] - expected_dict = { - "Country": "BR", - "Partner": "test_partner", - "Partner ID": "123", - "Partner Status": "Active", - "Advertiser": "test_advertiser", - "Advertiser ID": "123", - "Advertiser Status": "0", - "Advertiser Integration Code": "", - "Insertion Order": "test_io", - "Insertion Order ID": "123", - "Insertion Order Status": "Active", - "Insertion Order Integration Code": "", - "Line Item": "test_line_item", - "Line Item ID": "123", - "Line Item Status": "0", - "Line Item Integration Code": "", - "Targeted Data Providers": "", - "Cookie Reach: Average Impression Frequency": "0.00", - "Cookie Reach: Impression Reach": "41", + def test_get_report_generator__no_bytes(self): + lines = ["Date,Country,AdvertiserId,Impressions", "2020-01-01,France,1234,10"] + line_iterator = iter(lines) + expected = { + "Date": "2020-01-01", + "Country": "France", + "AdvertiserId": "1234", + "Impressions": "10", } - line_iterator_with_blank_line = (line for line in lines) - for dic in get_generator_dict_from_str_csv(line_iterator_with_blank_line): - self.assertEqual(dic, expected_dict) + for output in get_report_generator_from_flat_file(line_iterator): + self.assertDictEqual(output, expected) - def test_line_parsing(self): - input_lines = ['abc, 1, 0.0, 4, "a,b,c", abc', '"abc", 1, 0.0, 4, "a,b,c", abc', "abc, 1, 0.0, 4, abc, abc"] - expected_outputs = [ + def test_parse_decoded_line(self): + lines = [ + 'abc, 1, 0.0, 4, "a,b,c", abc', + '"abc", 1, 0.0, 4, "a,b,c", abc', + "abc, 1, 0.0, 4, abc, abc", + ] + expected_lines = [ ["abc", "1", "0.0", "4", "a,b,c", "abc"], ["abc", "1", "0.0", "4", "a,b,c", "abc"], ["abc", "1", "0.0", "4", "abc", "abc"], ] - for index in range(len(input_lines)): - self.assertEqual(parse_decoded_line(input_lines[index]), expected_outputs[index]) - def test_response_not_binary_with_date(self): + for line, expected_line in zip(lines, expected_lines): + output_line = parse_decoded_line(line) + self.assertEqual(output_line, expected_line) + + def test_get_report_generator__add_column(self): lines = [ - ( - "Country,Partner,Partner ID,Partner Status,Advertiser,Advertiser" - " ID,Advertiser Status,Advertiser Integration Code,Insertion" - " Order,Insertion Order ID,Insertion Order Status,Insertion" - " Order Integration Code,Line Item,Line Item ID,Line Item" - " Status,Line Item Integration Code,Targeted Data Providers," - "Cookie Reach: Average Impression Frequency,Cookie Reach: " - "Impression Reach" - ), - ( - "BR,test_partner,123,Active,test_advertiser,123," - "0,,test_io,123,Active,,test_line_item" - ',123,0,,"",0.00,41' - ), + b"Date,AdvertiserId,Reach", + b"2020-01-01,1234,1000", + b"2020-01-01,5678,2000", ] - expected_dict = { - "Country": "BR", - "Partner": "test_partner", - "Partner ID": "123", - "Partner Status": "Active", - "Advertiser": "test_advertiser", - "Advertiser ID": "123", - "Advertiser Status": "0", - "Advertiser Integration Code": "", - "Insertion Order": "test_io", - "Insertion Order ID": "123", - "Insertion Order Status": "Active", - "Insertion Order Integration Code": "", - "Line Item": "test_line_item", - "Line Item ID": "123", - "Line Item Status": "0", - "Line Item Integration Code": "", - "Targeted Data Providers": "", - "Cookie Reach: Average Impression Frequency": "0.00", - "Cookie Reach: Impression Reach": "41", - "date_start": "2020/01/01", - "date_stop": "2020/01/31", - } - line_iterator_with_blank_line = (line for line in lines) - with patch("nck.utils.date_handler.date") as mock_date: - mock_date.today.return_value = date(2020, 2, 1) - mock_date.side_effect = lambda *args, **kw: date(*args, **kw) - for dic in get_generator_dict_from_str_csv( - line_iterator_with_blank_line, add_date=True, day_range="PREVIOUS_MONTH", date_format="%Y/%m/%d" - ): - self.assertEqual(dic, expected_dict) + expected = [ + { + "Date": "2020-01-01", + "AdvertiserId": "1234", + "Reach": "1000", + "Campaign": "XMas Sale", + "Country": "France", + }, + { + "Date": "2020-01-01", + "AdvertiserId": "5678", + "Reach": "2000", + "Campaign": "XMas Sale", + "Country": "France", + }, + ] + line_iterator = iter(lines) + output = get_report_generator_from_flat_file( + line_iterator, + add_column=True, + column_dict={"Campaign": "XMas Sale", "Country": "France"}, + ) + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) - def test_csv_with_headers_only(self): - input_report = (row for row in [b"Just,Headers,in,this,empty,report"]) + def test_get_report_generator__file_with_headers_only(self): + lines = [b"Just,Headers,in,this,empty,report"] + line_iterator = iter(lines) self.assertFalse( - next(get_generator_dict_from_str_csv(input_report, skip_last_row=False), False), "Data is not empty" + next(get_report_generator_from_flat_file(line_iterator), False), + "Data is not empty", + ) + + def test_get_report_generator__skip_when_no_match_with_headers_length(self): + lines = [ + b"Date,AdvertiserId,Impressions", + b"2020-01-01,1234,10", + b"2020-01-01,5678,20", + b"Copyrigth: report downloaded from Artefact.com", + ] + line_iterator = iter(lines) + output = get_report_generator_from_flat_file( + line_iterator, skip_n_first=0, skip_n_last=0 + ) + expected = [ + {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, + {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, + ] + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) + + def test_get_report_generator__skip_blank(self): + lines = [ + b"Date,AdvertiserId,Impressions", + b"2020-01-01,1234,10", + b"", + b"2020-01-01,5678,20", + ] + line_iterator = iter(lines) + output = get_report_generator_from_flat_file( + line_iterator, skip_n_first=0, skip_n_last=0 + ) + expected = [ + {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, + {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, + ] + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) + + def test_get_report_generator__skip_first_and_last(self): + lines = [ + b"(Not desired first line)", + b"(Not desired second line)", + b"Date,AdvertiserId,Impressions", + b"2020-01-01,1234,10", + b"2020-01-01,5678,20", + b"(Not desired last line)", + ] + line_iterator = iter(lines) + output = get_report_generator_from_flat_file( + line_iterator, skip_n_first=2, skip_n_last=1 + ) + expected = [ + {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, + {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, + ] + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) + + def test_get_report_generator__skip_last_with_blank_at_end_of_file(self): + lines = [ + b"Date,AdvertiserId,Impressions", + b"2020-01-01,1234,10", + b"2020-01-01,5678,20", + b"(Not desired last line)", + b"", + ] + line_iterator = iter(lines) + output = get_report_generator_from_flat_file( + line_iterator, skip_n_first=0, skip_n_last=1 ) + expected = [ + {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, + {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, + ] + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) - @parameterized.expand( - [ - ( - True, - [ - b'"Perf report (2017-03-01 - 2020-03-25)"', - b"AdFormat\tAdGroupId\tAdGroupName", - b"IMAGE\t123\tAdGroup", - b"IMAGE\t123\tAdGroup", - ], - ), - (False, [b"AdFormat\tAdGroupId\tAdGroupName", b"IMAGE\t123\tAdGroup", b"IMAGE\t123\tAdGroup"]), + def test_get_report_generator__skip_no_first_nor_last(self): + lines = [ + b"Date,AdvertiserId,Impressions", + b"2020-01-01,1234,10", + b"2020-01-01,5678,20", + ] + line_iterator = iter(lines) + output = get_report_generator_from_flat_file( + line_iterator, skip_n_first=0, skip_n_last=0 + ) + expected = [ + {"Date": "2020-01-01", "AdvertiserId": "1234", "Impressions": "10"}, + {"Date": "2020-01-01", "AdvertiserId": "5678", "Impressions": "20"}, ] - ) - def test_parse_tsv_with_first_row_skipped(self, skip_first_row, lines): - expected_dict = {"AdFormat": "IMAGE", "AdGroupId": "123", "AdGroupName": "AdGroup"} - line_iterator = (line for line in lines) - for dic in get_generator_dict_from_str_tsv(line_iterator, skip_first_row=skip_first_row): - self.assertEqual(dic, expected_dict) + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record)