From f3cebf6725ae79c5f96822476eded52001dc8083 Mon Sep 17 00:00:00 2001 From: Eric Boucher Date: Tue, 25 Jul 2023 17:19:42 +0200 Subject: [PATCH 1/8] add optional do_not_paginate parameter --- tap_github/repository_streams.py | 23 +++++++++++++++++++++-- tap_github/streams.py | 2 ++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 2e03497c..b44a9854 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -2191,6 +2191,7 @@ class DependenciesStream(GitHubGraphqlStream): parent_stream_type = RepositoryStream state_partitioning_keys = ["repo_id"] ignore_parent_replication_key = True + do_not_paginate = False @property def http_headers(self) -> dict: @@ -2220,7 +2221,7 @@ def query(self) -> str: """Return dynamic GraphQL query.""" # Graphql id is equivalent to REST node_id. To keep the tap consistent, we rename "id" to "node_id". # Due to GrapQl nested-pagination limitations, we loop through the top level dependencyGraphManifests one by one. - return """ + initial_query = """ query repositoryDependencies($repo: String! $org: String! $nextPageCursor_0: String $nextPageCursor_1: String) { repository(name: $repo owner: $org) { dependencyGraphManifests (first: 1 withDependencies: true after: $nextPageCursor_0) { @@ -2263,9 +2264,21 @@ def query(self) -> str: cost } } - """ + if self.do_not_paginate: + no_pagination_query = initial_query.replace( + " $nextPageCursor_0: String $nextPageCursor_1: String", "" + ) + no_pagination_query = no_pagination_query.replace("after: $nextPageCursor_0", "") + no_pagination_query = no_pagination_query.replace("after: $nextPageCursor_1", "") + no_pagination_query = no_pagination_query.replace("first: 1", "first: 10") + no_pagination_query = no_pagination_query.replace("first: 50", "first: 100") + + return no_pagination_query + + return initial_query + schema = th.PropertiesList( # Parent Keys th.Property("repo", th.StringType), @@ -2296,6 +2309,12 @@ def query(self) -> str: ).to_dict() +class DependenciesStreamIncomplete(DependenciesStream): + """Defines 'DependenciesStreamDirty' stream to limit pagination.""" + do_not_paginate = True + + + class TrafficRestStream(GitHubRestStream): """Base class for Traffic Streams""" diff --git a/tap_github/streams.py b/tap_github/streams.py index e1b05e58..5f71d809 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -18,6 +18,7 @@ CommunityProfileStream, ContributorsStream, DependenciesStream, + DependenciesStreamIncomplete, DependentsStream, EventsStream, ExtraMetricsStream, @@ -75,6 +76,7 @@ def __init__(self, valid_queries: Set[str], streams: List[Type[Stream]]): CommunityProfileStream, ContributorsStream, DependenciesStream, + DependenciesStreamIncomplete, DependentsStream, EventsStream, IssueCommentsStream, From 6a38618dce77def3f1c4f139ceeef694e54eadf9 Mon Sep 17 00:00:00 2001 From: Eric Boucher Date: Tue, 25 Jul 2023 17:22:07 +0200 Subject: [PATCH 2/8] client option do not paginate --- tap_github/client.py | 3 +++ tap_github/repository_streams.py | 15 +-------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/tap_github/client.py b/tap_github/client.py index 77f07c39..98fabc33 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -334,6 +334,9 @@ def get_next_page_token( Warning - we recommend to avoid using deep (nested) pagination. """ + if self.do_not_paginate: + return None + resp_json = response.json() # Find if results contains "hasNextPage_X" flags and if any are True. diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index b44a9854..7e0e0046 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -2191,7 +2191,6 @@ class DependenciesStream(GitHubGraphqlStream): parent_stream_type = RepositoryStream state_partitioning_keys = ["repo_id"] ignore_parent_replication_key = True - do_not_paginate = False @property def http_headers(self) -> dict: @@ -2221,7 +2220,7 @@ def query(self) -> str: """Return dynamic GraphQL query.""" # Graphql id is equivalent to REST node_id. To keep the tap consistent, we rename "id" to "node_id". # Due to GrapQl nested-pagination limitations, we loop through the top level dependencyGraphManifests one by one. - initial_query = """ + return """ query repositoryDependencies($repo: String! $org: String! $nextPageCursor_0: String $nextPageCursor_1: String) { repository(name: $repo owner: $org) { dependencyGraphManifests (first: 1 withDependencies: true after: $nextPageCursor_0) { @@ -2266,18 +2265,6 @@ def query(self) -> str: } """ - if self.do_not_paginate: - no_pagination_query = initial_query.replace( - " $nextPageCursor_0: String $nextPageCursor_1: String", "" - ) - no_pagination_query = no_pagination_query.replace("after: $nextPageCursor_0", "") - no_pagination_query = no_pagination_query.replace("after: $nextPageCursor_1", "") - no_pagination_query = no_pagination_query.replace("first: 1", "first: 10") - no_pagination_query = no_pagination_query.replace("first: 50", "first: 100") - - return no_pagination_query - - return initial_query schema = th.PropertiesList( # Parent Keys From 8415f6de0e056bcf1b787e6ae6cf49845e2b0df9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Jul 2023 15:23:12 +0000 Subject: [PATCH 3/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tap_github/repository_streams.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 7e0e0046..67c26f72 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -2265,7 +2265,6 @@ def query(self) -> str: } """ - schema = th.PropertiesList( # Parent Keys th.Property("repo", th.StringType), @@ -2298,8 +2297,8 @@ def query(self) -> str: class DependenciesStreamIncomplete(DependenciesStream): """Defines 'DependenciesStreamDirty' stream to limit pagination.""" - do_not_paginate = True + do_not_paginate = True class TrafficRestStream(GitHubRestStream): From b126e3836c3c039fa8ff19bc8bc65672150c9625 Mon Sep 17 00:00:00 2001 From: Eric Boucher Date: Tue, 25 Jul 2023 17:25:22 +0200 Subject: [PATCH 4/8] adapt pagination --- tap_github/repository_streams.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 7e0e0046..5d0c11ef 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -2300,6 +2300,13 @@ class DependenciesStreamIncomplete(DependenciesStream): """Defines 'DependenciesStreamDirty' stream to limit pagination.""" do_not_paginate = True + @property + def query(self) -> str: + """""" + initial_query = super().query() + no_pagination_query = initial_query.replace("first: 1", "first: 20") + no_pagination_query = no_pagination_query.replace("first: 50", "first: 100") + return no_pagination_query class TrafficRestStream(GitHubRestStream): From 02e07622f6a9322c12a3178a883836c3831c0050 Mon Sep 17 00:00:00 2001 From: Eric Boucher Date: Tue, 25 Jul 2023 17:25:55 +0200 Subject: [PATCH 5/8] Update repository_streams.py --- tap_github/repository_streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 5d0c11ef..41e652f6 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -2302,7 +2302,7 @@ class DependenciesStreamIncomplete(DependenciesStream): @property def query(self) -> str: - """""" + """Return altered query to limit pagination.""" initial_query = super().query() no_pagination_query = initial_query.replace("first: 1", "first: 20") no_pagination_query = no_pagination_query.replace("first: 50", "first: 100") From 7f27c30df29aa9ee5ad01388d7b15d0cea3249b1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Jul 2023 15:26:55 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tap_github/repository_streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index bc8bf741..7a522017 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -2265,7 +2265,6 @@ def query(self) -> str: } """ - schema = th.PropertiesList( # Parent Keys th.Property("repo", th.StringType), @@ -2298,6 +2297,7 @@ def query(self) -> str: class DependenciesStreamIncomplete(DependenciesStream): """Defines 'DependenciesStreamDirty' stream to limit pagination.""" + do_not_paginate = True @property From 1070f8c6c77b9dc660eaf6340769db43e343d4e5 Mon Sep 17 00:00:00 2001 From: Eric Boucher Date: Tue, 25 Jul 2023 19:02:24 +0200 Subject: [PATCH 7/8] Update client.py --- tap_github/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/client.py b/tap_github/client.py index 98fabc33..9b61ddcc 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -334,7 +334,7 @@ def get_next_page_token( Warning - we recommend to avoid using deep (nested) pagination. """ - if self.do_not_paginate: + if hasattr(self, 'do_not_paginate') and self.do_not_paginate: return None resp_json = response.json() From 81e7f4961950fc0bb86f219f29b7f0b8a1fc37eb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Jul 2023 17:02:37 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tap_github/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/client.py b/tap_github/client.py index 9b61ddcc..749963a6 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -334,7 +334,7 @@ def get_next_page_token( Warning - we recommend to avoid using deep (nested) pagination. """ - if hasattr(self, 'do_not_paginate') and self.do_not_paginate: + if hasattr(self, "do_not_paginate") and self.do_not_paginate: return None resp_json = response.json()