From 45da0b35de1ead462307fc8f1f495087a92ab9fa Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 27 Sep 2024 17:12:37 -0700 Subject: [PATCH] added all three configurations to outlinks ipynb (#3202) --- .../anvil-analytics-combined/constants.py | 19 ++ .../portal-outlinks.ipynb | 287 ++++++++++++++++++ .../portal-outlinks.ipynb | 104 ------- 3 files changed, 306 insertions(+), 104 deletions(-) create mode 100644 analytics/anvil-analytics-combined/constants.py create mode 100644 analytics/anvil-analytics-combined/portal-outlinks.ipynb delete mode 100644 analytics/anvil-analytics-portal/portal-outlinks.ipynb diff --git a/analytics/anvil-analytics-combined/constants.py b/analytics/anvil-analytics-combined/constants.py new file mode 100644 index 00000000..52aabce7 --- /dev/null +++ b/analytics/anvil-analytics-combined/constants.py @@ -0,0 +1,19 @@ +ANVIL_PORTAL_NAME = "anvil-portal" +ANVIL_CATALOG_NAME = "anvil-catalog" +ANVIL_EXPLORER_NAME = "anvil-explorer" + +PROPERTY_ID_MAP = { + ANVIL_CATALOG_NAME: "368661710", + ANVIL_EXPLORER_NAME: "383267328", + ANVIL_PORTAL_NAME: "368678391", +} + +SECRET_NAME = 'GA4_CREDENTIALS' +GA_PROPERTY_PORTAL = "368678391" # AnVIL Portal - GA4 +PRE_AUDIENCE_EXCLUDE_PAGES_FILTER = "landingPagePlusQueryString!=/guides/content/creating-links" +EXCLUDE_PAGES_FILTER = {"filter": {"fieldName": "audienceId", "numericFilter": {"operation": "EQUAL", "value": {"doubleValue": 5559548544}}}} +ANALYTICS_START = "2021-01-01" +TODAY = 'today' +THIRTY_DAYS_AGO = '30daysAgo' + +OAUTH_PORT = 8082 \ No newline at end of file diff --git a/analytics/anvil-analytics-combined/portal-outlinks.ipynb b/analytics/anvil-analytics-combined/portal-outlinks.ipynb new file mode 100644 index 00000000..397bb46c --- /dev/null +++ b/analytics/anvil-analytics-combined/portal-outlinks.ipynb @@ -0,0 +1,287 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: GA4_CREDENTIALS=../../../analytics_keys/ga4_key_test.json\n" + ] + } + ], + "source": [ + "# Add credentials environment variable here\n", + "# Example:\n", + "#%env GA4_CREDENTIALS=your/path/to/your/credentials.json\n", + "%env GA4_CREDENTIALS=../../../analytics_keys/ga4_key_test.json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import analytics.charts as ac\n", + "import analytics.api as ga\n", + "import pandas as pd\n", + "from constants import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=425030666072-vun85q7nt3038skng8gs0f03juh97e17.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8082%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fanalytics.readonly&state=9yQGXQsk0VZyaaqUuCVtkfw6Yjsi9T&access_type=offline\n" + ] + } + ], + "source": [ + "default_params = {\n", + " \"service_system\": ac.authenticate_ga(SECRET_NAME, ga.ga4_service_params, port=OAUTH_PORT),\n", + "}\n", + "outlink_search_params = {\n", + " \"start_date\": '30daysAgo',\n", + " \"end_date\": TODAY,\n", + " \"dimensionFilter\": {\n", + " \"filter\": {\n", + " \"stringFilter\": {\n", + " \"matchType\": \"EXACT\",\n", + " \"value\": \"true\"\n", + " },\n", + " \"fieldName\": \"outbound\"\n", + " }\n", + " }\n", + "}\n", + "catalog_params = {\n", + " **default_params,\n", + " **outlink_search_params,\n", + " \"property\": PROPERTY_ID_MAP[ANVIL_CATALOG_NAME],\n", + "}\n", + "explorer_params = {\n", + " **default_params,\n", + " **outlink_search_params,\n", + " \"property\": PROPERTY_ID_MAP[ANVIL_EXPLORER_NAME],\n", + "}\n", + "portal_params = {\n", + " **default_params,\n", + " **outlink_search_params,\n", + " \"base_dimension_filter\": EXCLUDE_PAGES_FILTER,\n", + " \"property\": PROPERTY_ID_MAP[ANVIL_PORTAL_NAME],\n", + "}\n", + "\n", + "outlink_params_for_each_property = {\n", + " ANVIL_PORTAL_NAME: portal_params,\n", + " ANVIL_CATALOG_NAME: catalog_params,\n", + " ANVIL_EXPLORER_NAME: explorer_params,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "anvil-portal\n", + "anvil-catalog\n", + "anvil-explorer\n" + ] + } + ], + "source": [ + "results_dict = {}\n", + "\n", + "for key in outlink_params_for_each_property:\n", + " print(key)\n", + " df_outlinks = (ac.get_data_df(\n", + " [\"eventCount\"],\n", + " [\"pagePath\", \"linkDomain\", \"linkUrl\"], \n", + " **outlink_params_for_each_property[key], \n", + " df_processor=lambda df: df.loc[df.index.get_level_values(1) != \"\"]\n", + " )).rename_axis(\n", + " index={\"pagePath\": \"Current Page\", \"linkDomain\": \"Outlink Domain\", \"linkUrl\": \"Outlink URL\"}\n", + " )\n", + " results_dict[key] = df_outlinks\n", + "\n", + "with pd.ExcelWriter(\"outlinks_summary.xlsx\") as writer:\n", + " for key in results_dict:\n", + " results_dict[key].to_excel(writer, sheet_name=key)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
eventCount
Current PageOutlink DomainOutlink URL
/data/consortia/GTEx/workspacesanvil.terra.biohttps://anvil.terra.bio/26
/data/consortia/HPRC/workspacesanvil.terra.biohttps://anvil.terra.bio/18
/data/consortia/GREGoRgregorconsortium.orghttps://gregorconsortium.org/data15
/data/studies/phs001642/workspacesanvil.terra.biohttps://anvil.terra.bio/14
/data/studies/phs000424/workspacesanvil.terra.biohttps://anvil.terra.bio/13
............
/data/studies/phs002378ncbi.nlm.nih.govhttps://www.ncbi.nlm.nih.gov/books/NBK5295/1
https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs0010261
/data/studies/phs003047dbgap.ncbi.nlm.nih.govhttps://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?adddataset=phs0030471
/data/studies/phs003181/workspacesanvil.terra.biohttps://anvil.terra.bio/1
/data/studies/phs003184ncbi.nlm.nih.govhttps://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs003184.v1.p11
\n", + "

61 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " eventCount\n", + "Current Page Outlink Domain Outlink URL \n", + "/data/consortia/GTEx/workspaces anvil.terra.bio https://anvil.terra.bio/ 26\n", + "/data/consortia/HPRC/workspaces anvil.terra.bio https://anvil.terra.bio/ 18\n", + "/data/consortia/GREGoR gregorconsortium.org https://gregorconsortium.org/data 15\n", + "/data/studies/phs001642/workspaces anvil.terra.bio https://anvil.terra.bio/ 14\n", + "/data/studies/phs000424/workspaces anvil.terra.bio https://anvil.terra.bio/ 13\n", + "... ...\n", + "/data/studies/phs002378 ncbi.nlm.nih.gov https://www.ncbi.nlm.nih.gov/books/NBK5295/ 1\n", + " https://www.ncbi.nlm.nih.gov/projects/gap/cgi-b... 1\n", + "/data/studies/phs003047 dbgap.ncbi.nlm.nih.gov https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?addda... 1\n", + "/data/studies/phs003181/workspaces anvil.terra.bio https://anvil.terra.bio/ 1\n", + "/data/studies/phs003184 ncbi.nlm.nih.gov https://www.ncbi.nlm.nih.gov/projects/gap/cgi-b... 1\n", + "\n", + "[61 rows x 1 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results_dict[ANVIL_CATALOG_NAME]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analytics/anvil-analytics-portal/portal-outlinks.ipynb b/analytics/anvil-analytics-portal/portal-outlinks.ipynb deleted file mode 100644 index 169f40f8..00000000 --- a/analytics/anvil-analytics-portal/portal-outlinks.ipynb +++ /dev/null @@ -1,104 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import analytics.charts as ac\n", - "import analytics_anvil as anvil\n", - "\n", - "\n", - "SECRET_NAME = 'GA4_CREDENTIALS'\n", - "GA_PROPERTY_PORTAL = \"368678391\" # AnVIL Portal - GA4\n", - "PRE_AUDIENCE_EXCLUDE_PAGES_FILTER = \"landingPagePlusQueryString!=/guides/content/creating-links\"\n", - "EXCLUDE_PAGES_FILTER = {\"filter\": {\"fieldName\": \"audienceId\", \"numericFilter\": {\"operation\": \"EQUAL\", \"value\": {\"doubleValue\": 5559548544}}}}\n", - "ANALYTICS_START = \"2021-01-01\"\n", - "TODAY = 'today'\n", - "PERIOD = \"2024-08\"\n", - "PREV_PERIOD = \"2024-07\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "default_params_portal = {\n", - " \"service_system\": anvil.authenticate_ga_portal(SECRET_NAME),\n", - " \"property\": GA_PROPERTY_PORTAL,\n", - " \"index_key_formatter\": anvil.adjust_table_index_key,\n", - " \"base_dimension_filter\": EXCLUDE_PAGES_FILTER\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "default_params_portal_with_date_and_filter = {\n", - " **default_params_portal,\n", - " \"start_date\": '30daysAgo',\n", - " \"end_date\": TODAY,\n", - " \"dimensionFilter\": {\n", - " \"filter\": {\n", - " \"stringFilter\": {\n", - " \"matchType\": \"EXACT\",\n", - " \"value\": \"true\"\n", - " },\n", - " \"fieldName\": \"outbound\"\n", - " }\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "df_outlink_domains = ac.get_data_df(\n", - " [\"eventCount\"],\n", - " [\"linkDomain\", \"linkUrl\"], \n", - " **default_params_portal_with_date_and_filter, \n", - " df_processor=lambda df: df.loc[df.index.get_level_values(0) != \"\"]\n", - ")\n", - "df_outlink_domains.to_csv(\"outlink_domains.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}