From a21104d9c2562e07c6eabb7acb17b0d355b40cf0 Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Wed, 20 Nov 2024 16:00:14 -0800 Subject: [PATCH] decreased confluence retry times and added more logging (#3184) * decreased confluence retry times and added more logging * added check on connector startup * no retries! * fr no retries --- .../connectors/confluence/connector.py | 17 +++++++++++++++ .../connectors/confluence/onyx_confluence.py | 6 +++++- .../danswer/connectors/confluence/utils.py | 2 +- .../confluence/group_sync.py | 21 +++++++++++++++++-- 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index 5c58cf194fa..8d614c163c7 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -3,6 +3,8 @@ from typing import Any from urllib.parse import quote +from atlassian import Confluence # type: ignore + from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE from danswer.configs.app_configs import INDEX_BATCH_SIZE @@ -116,6 +118,21 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None is_cloud=self.is_cloud, wiki_base=self.wiki_base, ) + + client_without_retries = Confluence( + api_version="cloud" if self.is_cloud else "latest", + url=self.wiki_base.rstrip("/"), + username=credentials["confluence_username"] if self.is_cloud else None, + password=credentials["confluence_access_token"] if self.is_cloud else None, + token=credentials["confluence_access_token"] if not self.is_cloud else None, + ) + spaces = client_without_retries.get_all_spaces(limit=1) + if not spaces: + raise RuntimeError( + f"No spaces found at {self.wiki_base}! " + "Check your credentials and wiki_base and make sure " + "is_cloud is set correctly." + ) return None def _get_comment_string_for_page_id(self, page_id: str) -> str: diff --git a/backend/danswer/connectors/confluence/onyx_confluence.py b/backend/danswer/connectors/confluence/onyx_confluence.py index c6a4d3857b1..4820429ba0f 100644 --- a/backend/danswer/connectors/confluence/onyx_confluence.py +++ b/backend/danswer/connectors/confluence/onyx_confluence.py @@ -84,7 +84,7 @@ def handle_confluence_rate_limit(confluence_call: F) -> F: def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: MAX_RETRIES = 5 - TIMEOUT = 3600 + TIMEOUT = 600 timeout_at = time.monotonic() + TIMEOUT for attempt in range(MAX_RETRIES): @@ -99,6 +99,10 @@ def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: return confluence_call(*args, **kwargs) except HTTPError as e: delay_until = _handle_http_error(e, attempt) + logger.warning( + f"HTTPError in confluence call. " + f"Retrying in {delay_until} seconds..." + ) while time.monotonic() < delay_until: # in the future, check a signal here to exit time.sleep(1) diff --git a/backend/danswer/connectors/confluence/utils.py b/backend/danswer/connectors/confluence/utils.py index 9deff36b269..cb5253f4c14 100644 --- a/backend/danswer/connectors/confluence/utils.py +++ b/backend/danswer/connectors/confluence/utils.py @@ -283,6 +283,6 @@ def build_confluence_client( password=credentials_json["confluence_access_token"] if is_cloud else None, token=credentials_json["confluence_access_token"] if not is_cloud else None, backoff_and_retry=True, - max_backoff_retries=60, + max_backoff_retries=10, max_backoff_seconds=60, ) diff --git a/backend/ee/danswer/external_permissions/confluence/group_sync.py b/backend/ee/danswer/external_permissions/confluence/group_sync.py index fab2bde4ea0..17140b33f71 100644 --- a/backend/ee/danswer/external_permissions/confluence/group_sync.py +++ b/backend/ee/danswer/external_permissions/confluence/group_sync.py @@ -1,3 +1,5 @@ +from atlassian import Confluence # type: ignore + from danswer.connectors.confluence.onyx_confluence import OnyxConfluence from danswer.connectors.confluence.utils import build_confluence_client from danswer.connectors.confluence.utils import get_user_email_from_username__server @@ -32,11 +34,26 @@ def _get_group_members_email_paginated( def confluence_group_sync( cc_pair: ConnectorCredentialPair, ) -> list[ExternalUserGroup]: + credentials = cc_pair.credential.credential_json is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False) + wiki_base = cc_pair.connector.connector_specific_config["wiki_base"] + + # test connection with direct client, no retries + confluence_client = Confluence( + api_version="cloud" if is_cloud else "latest", + url=wiki_base.rstrip("/"), + username=credentials["confluence_username"] if is_cloud else None, + password=credentials["confluence_access_token"] if is_cloud else None, + token=credentials["confluence_access_token"] if not is_cloud else None, + ) + spaces = confluence_client.get_all_spaces(limit=1) + if not spaces: + raise RuntimeError(f"No spaces found at {wiki_base}!") + confluence_client = build_confluence_client( - credentials_json=cc_pair.credential.credential_json, + credentials_json=credentials, is_cloud=is_cloud, - wiki_base=cc_pair.connector.connector_specific_config["wiki_base"], + wiki_base=wiki_base, ) # Get all group names