From fc5c1b98a7c12a15f22bf0beeca41339a6986456 Mon Sep 17 00:00:00 2001 From: "tidb-cloud-data-service[bot]" <134662589+tidb-cloud-data-service[bot]@users.noreply.github.com> Date: Wed, 9 Aug 2023 03:22:14 +0000 Subject: [PATCH] public-api: add exclude_unknown parameter for org/area API --- configs/public_api/http_endpoints/config.json | 50 +++++++++++++++++-- .../GET-repos-issue_creators-countries.sql | 11 ++-- ...GET-repos-issue_creators-organizations.sql | 7 ++- ...-repos-pull_request_creators-countries.sql | 11 ++-- ...os-pull_request_creators-organizations.sql | 7 ++- .../sql/GET-repos-stargazers-countries.sql | 9 ++-- .../GET-repos-stargazers-organizations.sql | 7 ++- .../http_endpoints/sql/GET-trends-repos.sql | 2 +- 8 files changed, 82 insertions(+), 22 deletions(-) diff --git a/configs/public_api/http_endpoints/config.json b/configs/public_api/http_endpoints/config.json index 0ec5e560c4f..59a6ec2a8d5 100644 --- a/configs/public_api/http_endpoints/config.json +++ b/configs/public_api/http_endpoints/config.json @@ -324,6 +324,13 @@ "required": 0, "default": "2099-12-31", "description": "" + }, + { + "name": "exclude_unknown", + "type": "string", + "required": 0, + "default": "", + "description": "" } ], "settings": { @@ -372,6 +379,13 @@ "required": 0, "default": "2099-12-31", "description": "" + }, + { + "name": "exclude_unknown", + "type": "string", + "required": 0, + "default": "", + "description": "" } ], "settings": { @@ -397,15 +411,15 @@ "name": "language", "type": "string", "required": 0, - "default": "All", - "description": "Specify the period of time to calculate trending repos." + "default": "", + "description": "" }, { "name": "period", "type": "string", "required": 0, - "default": "past_24_hours", - "description": "Specify using which programming language to filter trending repos. If not specified, all languages will be included." + "default": "", + "description": "" } ], "settings": { @@ -500,6 +514,13 @@ "required": 0, "default": "2099-12-31", "description": "" + }, + { + "name": "exclude_unknown", + "type": "boolean", + "required": 0, + "default": "true", + "description": "" } ], "settings": { @@ -548,6 +569,13 @@ "required": 1, "default": "2099-12-31", "description": "" + }, + { + "name": "exclude_unknown", + "type": "boolean", + "required": 0, + "default": "true", + "description": "" } ], "settings": { @@ -596,6 +624,13 @@ "required": 0, "default": "2099-12-31", "description": "" + }, + { + "name": "exclude_unknown", + "type": "boolean", + "required": 0, + "default": "true", + "description": "" } ], "settings": { @@ -644,6 +679,13 @@ "required": 0, "default": "2099-12-31", "description": "" + }, + { + "name": "exclude_unknown", + "type": "string", + "required": 0, + "default": "", + "description": "" } ], "settings": { diff --git a/configs/public_api/http_endpoints/sql/GET-repos-issue_creators-countries.sql b/configs/public_api/http_endpoints/sql/GET-repos-issue_creators-countries.sql index b7a2d8e3b52..fc46ef9285d 100644 --- a/configs/public_api/http_endpoints/sql/GET-repos-issue_creators-countries.sql +++ b/configs/public_api/http_endpoints/sql/GET-repos-issue_creators-countries.sql @@ -2,18 +2,21 @@ USE gharchive_dev; WITH group_by_countries AS ( SELECT - gu.country_code, - COUNT(1) as issue_creators + CASE + WHEN TRIM(gu.country_code) IN ('', 'N/A', 'UND') OR gu.country_code IS NULL THEN 'UNKNOWN' + ELSE gu.country_code + END AS country_code, + COUNT(DISTINCT actor_login) as issue_creators FROM github_events ge LEFT JOIN github_users gu ON ge.actor_login = gu.login WHERE repo_id = (SELECT repo_id FROM github_repos WHERE repo_name = CONCAT(${owner}, '/', ${repo}) LIMIT 1) AND ge.type = 'IssuesEvent' AND ge.action = 'opened' - AND gu.country_code NOT IN ('', 'N/A', 'UND') AND ge.created_at >= ${from} AND ge.created_at <= ${to} - GROUP BY country_code + AND IF(${exclude_unknown} = TRUE, gu.country_code NOT IN ('', 'N/A', 'UND'), TRUE) + GROUP BY 1 ), summary AS ( SELECT SUM(issue_creators) AS total FROM group_by_countries ) diff --git a/configs/public_api/http_endpoints/sql/GET-repos-issue_creators-organizations.sql b/configs/public_api/http_endpoints/sql/GET-repos-issue_creators-organizations.sql index 7e49f15340e..fee03dc3db0 100644 --- a/configs/public_api/http_endpoints/sql/GET-repos-issue_creators-organizations.sql +++ b/configs/public_api/http_endpoints/sql/GET-repos-issue_creators-organizations.sql @@ -2,7 +2,10 @@ USE gharchive_dev; WITH group_by_org AS ( SELECT - LOWER(REPLACE(gu.organization, '@', '')) AS org_name, + CASE + WHEN (TRIM(gu.organization) = '' OR gu.organization IS NULL) THEN 'UNKNOWN' + ELSE LOWER(REPLACE(gu.organization, '@', '')) + END AS org_name, COUNT(DISTINCT ge.actor_login) AS issue_creators FROM github_events ge LEFT JOIN github_users gu ON ge.actor_login = gu.login @@ -12,7 +15,7 @@ WITH group_by_org AS ( AND ge.action = 'opened' AND ge.created_at >= ${from} AND ge.created_at <= ${to} - AND gu.organization != '' + AND IF(${exclude_unknown} = TRUE, gu.organization != '', TRUE) GROUP BY org_name ), summary AS ( SELECT SUM(issue_creators) AS total FROM group_by_org diff --git a/configs/public_api/http_endpoints/sql/GET-repos-pull_request_creators-countries.sql b/configs/public_api/http_endpoints/sql/GET-repos-pull_request_creators-countries.sql index ef9341a3951..79559fd78e0 100644 --- a/configs/public_api/http_endpoints/sql/GET-repos-pull_request_creators-countries.sql +++ b/configs/public_api/http_endpoints/sql/GET-repos-pull_request_creators-countries.sql @@ -2,18 +2,21 @@ USE gharchive_dev; WITH group_by_countries AS ( SELECT - gu.country_code, - COUNT(1) as pull_request_creators + CASE + WHEN TRIM(gu.country_code) IN ('', 'N/A', 'UND') OR gu.country_code IS NULL THEN 'UNKNOWN' + ELSE gu.country_code + END AS country_code, + COUNT(DISTINCT actor_login) as pull_request_creators FROM github_events ge LEFT JOIN github_users gu ON ge.actor_login = gu.login WHERE repo_id = (SELECT repo_id FROM github_repos WHERE repo_name = CONCAT(${owner}, '/', ${repo}) LIMIT 1) AND ge.type = 'PullRequestEvent' AND ge.action = 'opened' - AND gu.country_code NOT IN ('', 'N/A', 'UND') AND ge.created_at >= ${from} AND ge.created_at <= ${to} - GROUP BY country_code + AND IF(${exclude_unknown} = TRUE, gu.country_code NOT IN ('', 'N/A', 'UND'), TRUE) + GROUP BY 1 ), summary AS ( SELECT SUM(pull_request_creators) AS total FROM group_by_countries ) diff --git a/configs/public_api/http_endpoints/sql/GET-repos-pull_request_creators-organizations.sql b/configs/public_api/http_endpoints/sql/GET-repos-pull_request_creators-organizations.sql index 59899f2cd4f..06021f765d1 100644 --- a/configs/public_api/http_endpoints/sql/GET-repos-pull_request_creators-organizations.sql +++ b/configs/public_api/http_endpoints/sql/GET-repos-pull_request_creators-organizations.sql @@ -2,7 +2,10 @@ USE gharchive_dev; WITH group_by_org AS ( SELECT - LOWER(REPLACE(gu.organization, '@', '')) AS org_name, + CASE + WHEN (TRIM(gu.organization) IN ('', 'n/a') OR gu.organization IS NULL) THEN 'UNKNOWN' + ELSE LOWER(REPLACE(gu.organization, '@', '')) + END AS org_name, COUNT(DISTINCT ge.actor_login) AS pull_request_creators FROM github_events ge LEFT JOIN github_users gu ON ge.actor_login = gu.login @@ -12,7 +15,7 @@ WITH group_by_org AS ( AND ge.action = 'opened' AND ge.created_at >= ${from} AND ge.created_at <= ${to} - AND gu.organization != '' + AND IF(${exclude_unknown} = TRUE, gu.organization != '', TRUE) GROUP BY org_name ), summary AS ( SELECT SUM(pull_request_creators) AS total FROM group_by_org diff --git a/configs/public_api/http_endpoints/sql/GET-repos-stargazers-countries.sql b/configs/public_api/http_endpoints/sql/GET-repos-stargazers-countries.sql index b0739703e97..a0bbd8ae2db 100644 --- a/configs/public_api/http_endpoints/sql/GET-repos-stargazers-countries.sql +++ b/configs/public_api/http_endpoints/sql/GET-repos-stargazers-countries.sql @@ -2,7 +2,10 @@ USE gharchive_dev; WITH group_by_countries AS ( SELECT - gu.country_code, + CASE + WHEN TRIM(gu.country_code) IN ('', 'N/A', 'UND') OR gu.country_code IS NULL THEN 'UNKNOWN' + ELSE gu.country_code + END AS country_code, COUNT(1) as stargazers FROM github_events ge LEFT JOIN github_users gu ON ge.actor_login = gu.login @@ -10,10 +13,10 @@ WITH group_by_countries AS ( repo_id = (SELECT repo_id FROM github_repos WHERE repo_name = CONCAT(${owner}, '/', ${repo}) LIMIT 1) AND ge.type = 'WatchEvent' AND ge.action = 'started' - AND gu.country_code NOT IN ('', 'N/A', 'UND') AND ge.created_at >= ${from} AND ge.created_at <= ${to} - GROUP BY country_code + AND IF(${exclude_unknown} = TRUE, gu.country_code NOT IN ('', 'N/A', 'UND'), TRUE) + GROUP BY 1 ), summary AS ( SELECT SUM(stargazers) AS total FROM group_by_countries ) diff --git a/configs/public_api/http_endpoints/sql/GET-repos-stargazers-organizations.sql b/configs/public_api/http_endpoints/sql/GET-repos-stargazers-organizations.sql index a786d1074e1..9f307d4be9c 100644 --- a/configs/public_api/http_endpoints/sql/GET-repos-stargazers-organizations.sql +++ b/configs/public_api/http_endpoints/sql/GET-repos-stargazers-organizations.sql @@ -2,7 +2,10 @@ USE gharchive_dev; WITH group_by_org AS ( SELECT - LOWER(REPLACE(gu.organization, '@', '')) AS org_name, + CASE + WHEN (TRIM(gu.organization) = '' OR gu.organization IS NULL) THEN 'UNKNOWN' + ELSE LOWER(REPLACE(gu.organization, '@', '')) + END AS org_name, COUNT(DISTINCT ge.actor_login) AS stargazers FROM github_events ge LEFT JOIN github_users gu ON ge.actor_login = gu.login @@ -12,7 +15,7 @@ WITH group_by_org AS ( AND ge.action = 'started' AND ge.created_at >= ${from} AND ge.created_at <= ${to} - AND gu.organization != '' + AND IF(${exclude_unknown} = TRUE, gu.organization != '', TRUE) GROUP BY org_name ), summary AS ( SELECT SUM(stargazers) AS total FROM group_by_org diff --git a/configs/public_api/http_endpoints/sql/GET-trends-repos.sql b/configs/public_api/http_endpoints/sql/GET-trends-repos.sql index ef7411d82ca..10e12feb699 100644 --- a/configs/public_api/http_endpoints/sql/GET-trends-repos.sql +++ b/configs/public_api/http_endpoints/sql/GET-trends-repos.sql @@ -40,4 +40,4 @@ SELECT r.*, rc.collection_names FROM repos r -LEFT JOIN repo_with_collections rc ON r.repo_id = rc.repo_id \ No newline at end of file +LEFT JOIN repo_with_collections rc ON r.repo_id = rc.repo_id; \ No newline at end of file