Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enable fetching retried jobs #107

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git
Create a JSON file called `config.json` containing:
- Access token you just created
- API URL for your GitLab account. If you are using the public gitlab.com this will be `https://gitlab.com/api/v4`
- Groups to track (space separated)
- Groups to track (space separated)
- Projects to track (space separated)

Notes on group and project options:
Expand All @@ -79,6 +79,7 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git
"ultimate_license": true,
"fetch_merge_request_commits": false,
"fetch_pipelines_extended": false,
"fetch_retried_jobs": false,
"fetch_group_variables": false,
"fetch_project_variables": false
}
Expand All @@ -92,6 +93,8 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git

If `fetch_pipelines_extended` is true (defaults to false), then for every Pipeline fetched with `sync_pipelines` (which returns N pages containing all pipelines per project), also fetch extended details of each of these pipelines with `sync_pipelines_extended`. Similar concerns as those related to `fetch_merge_request_commits` apply here - every pipeline fetched with `sync_pipelines_extended` requires a separate API call.

If `fetch_retried_jobs` is true (defaults to false), then include retried jobs as well.

If `fetch_group_variables` is true (defaults to false), then Group-level CI/CD variables will be retrieved for each available / specified group. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Group-level CI/CD variables.

If `fetch_project_variables` is true (defaults to false), then Project-level CI/CD variables will be retrieved for each available / specified project. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Project-level CI/CD variables.
Expand Down
16 changes: 10 additions & 6 deletions tap_gitlab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
'ultimate_license': False,
'fetch_merge_request_commits': False,
'fetch_pipelines_extended': False,
'fetch_retried_jobs': False,
'fetch_group_variables': False,
'fetch_project_variables': False,
}
Expand Down Expand Up @@ -71,7 +72,7 @@ def load_schema(entity):
'replication_keys': ['updated_at'],
},
'jobs': {
'url': '/projects/{id}/pipelines/{secondary_id}/jobs',
'url': '/projects/{id}/pipelines/{secondary_id}/jobs?include_retried={fetch_retried_jobs}',
'schema': load_schema('jobs'),
'key_properties': ['id'],
'replication_method': 'FULL_TABLE',
Expand Down Expand Up @@ -229,7 +230,7 @@ class ResourceInaccessible(Exception):
def truthy(val) -> bool:
return str(val).lower() in TRUTHY

def get_url(entity, id, secondary_id=None, start_date=None):
def get_url(entity, id, secondary_id=None, start_date=None, fetch_retried_jobs=False):
if not isinstance(id, int):
id = id.replace("/", "%2F")

Expand All @@ -239,7 +240,8 @@ def get_url(entity, id, secondary_id=None, start_date=None):
return CONFIG['api_url'] + RESOURCES[entity]['url'].format(
id=id,
secondary_id=secondary_id,
start_date=start_date
start_date=start_date,
fetch_retried_jobs=fetch_retried_jobs,
)


Expand All @@ -250,8 +252,8 @@ def get_start(entity):

@backoff.on_predicate(backoff.runtime,
predicate=lambda r: r.status_code == 429,
max_tries=5,
value=lambda r: int(r.headers.get("Retry-After")),
max_tries=5,
value=lambda r: int(r.headers.get("Retry-After")),
jitter=None)
@backoff.on_exception(backoff.expo,
(requests.exceptions.RequestException),
Expand Down Expand Up @@ -770,8 +772,9 @@ def sync_jobs(project, pipeline):
if stream is None or not stream.is_selected():
return
mdata = metadata.to_map(stream.metadata)
fetch_retried_jobs = CONFIG['fetch_retried_jobs']

url = get_url(entity=entity, id=project['id'], secondary_id=pipeline['id'])
url = get_url(entity=entity, id=project['id'], secondary_id=pipeline['id'], fetch_retried_jobs=fetch_retried_jobs)
with Transformer(pre_hook=format_timestamp) as transformer:
for row in gen_request(url):
row['project_id'] = project['id']
Expand Down Expand Up @@ -932,6 +935,7 @@ def main_impl():
CONFIG['ultimate_license'] = truthy(CONFIG['ultimate_license'])
CONFIG['fetch_merge_request_commits'] = truthy(CONFIG['fetch_merge_request_commits'])
CONFIG['fetch_pipelines_extended'] = truthy(CONFIG['fetch_pipelines_extended'])
CONFIG['fetch_retried_jobs'] = truthy(CONFIG['fetch_retried_jobs'])
CONFIG['fetch_group_variables'] = truthy(CONFIG['fetch_group_variables'])
CONFIG['fetch_project_variables'] = truthy(CONFIG['fetch_project_variables'])

Expand Down
Loading