Skip to content

Commit

Permalink
Merge pull request #2630 from danswer-ai/hotfix/v0.6-jira-limit-size
Browse files Browse the repository at this point in the history
Hotfix/v0.6 jira limit size
  • Loading branch information
rkuo-danswer authored Sep 30, 2024
2 parents 3cafedc + 3a8f06c commit c28a8d8
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 2 deletions.
4 changes: 4 additions & 0 deletions backend/danswer/configs/app_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@
for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
if ignored_tag
]
# Maximum size for Jira tickets in bytes (default: 100KB)
JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
)

GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME")

Expand Down
13 changes: 11 additions & 2 deletions backend/danswer/connectors/danswer_jira/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
from danswer.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from danswer.connectors.interfaces import GenerateDocumentsOutput
Expand Down Expand Up @@ -134,10 +135,18 @@ def fetch_jira_issues_batch(
else extract_text_from_adf(jira.raw["fields"]["description"])
)
comments = _get_comment_strs(jira, comment_email_blacklist)
semantic_rep = f"{description}\n" + "\n".join(
ticket_content = f"{description}\n" + "\n".join(
[f"Comment: {comment}" for comment in comments if comment]
)

# Check ticket size
if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
logger.info(
f"Skipping {jira.key} because it exceeds the maximum size of "
f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
)
continue

page_url = f"{jira_client.client_info()}/browse/{jira.key}"

people = set()
Expand Down Expand Up @@ -180,7 +189,7 @@ def fetch_jira_issues_batch(
doc_batch.append(
Document(
id=page_url,
sections=[Section(link=page_url, text=semantic_rep)],
sections=[Section(link=page_url, text=ticket_content)],
source=DocumentSource.JIRA,
semantic_identifier=jira.fields.summary,
doc_updated_at=time_str_to_utc(jira.fields.updated),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from collections.abc import Callable
from collections.abc import Generator
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from jira.resources import Issue
from pytest_mock import MockFixture

from danswer.connectors.danswer_jira.connector import fetch_jira_issues_batch


@pytest.fixture
def mock_jira_client() -> MagicMock:
return MagicMock()


@pytest.fixture
def mock_issue_small() -> MagicMock:
issue = MagicMock()
issue.key = "SMALL-1"
issue.fields.description = "Small description"
issue.fields.comment.comments = [
MagicMock(body="Small comment 1"),
MagicMock(body="Small comment 2"),
]
issue.fields.creator.displayName = "John Doe"
issue.fields.creator.emailAddress = "[email protected]"
issue.fields.summary = "Small Issue"
issue.fields.updated = "2023-01-01T00:00:00+0000"
issue.fields.labels = []
return issue


@pytest.fixture
def mock_issue_large() -> MagicMock:
# This will be larger than 100KB
issue = MagicMock()
issue.key = "LARGE-1"
issue.fields.description = "a" * 99_000
issue.fields.comment.comments = [
MagicMock(body="Large comment " * 1000),
MagicMock(body="Another large comment " * 1000),
]
issue.fields.creator.displayName = "Jane Doe"
issue.fields.creator.emailAddress = "[email protected]"
issue.fields.summary = "Large Issue"
issue.fields.updated = "2023-01-02T00:00:00+0000"
issue.fields.labels = []
return issue


@pytest.fixture
def patched_type() -> Callable[[Any], type]:
def _patched_type(obj: Any) -> type:
if isinstance(obj, MagicMock):
return Issue
return type(obj)

return _patched_type


@pytest.fixture
def mock_jira_api_version() -> Generator[Any, Any, Any]:
with patch("danswer.connectors.danswer_jira.connector.JIRA_API_VERSION", "2"):
yield


@pytest.fixture
def patched_environment(
patched_type: type,
mock_jira_api_version: MockFixture,
) -> Generator[Any, Any, Any]:
with patch("danswer.connectors.danswer_jira.connector.type", patched_type):
yield


def test_fetch_jira_issues_batch_small_ticket(
mock_jira_client: MagicMock,
mock_issue_small: MagicMock,
patched_environment: MockFixture,
) -> None:
mock_jira_client.search_issues.return_value = [mock_issue_small]

docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)

assert count == 1
assert len(docs) == 1
assert docs[0].id.endswith("/SMALL-1")
assert "Small description" in docs[0].sections[0].text
assert "Small comment 1" in docs[0].sections[0].text
assert "Small comment 2" in docs[0].sections[0].text


def test_fetch_jira_issues_batch_large_ticket(
mock_jira_client: MagicMock,
mock_issue_large: MagicMock,
patched_environment: MockFixture,
) -> None:
mock_jira_client.search_issues.return_value = [mock_issue_large]

docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)

assert count == 1
assert len(docs) == 0 # The large ticket should be skipped


def test_fetch_jira_issues_batch_mixed_tickets(
mock_jira_client: MagicMock,
mock_issue_small: MagicMock,
mock_issue_large: MagicMock,
patched_environment: MockFixture,
) -> None:
mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]

docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)

assert count == 2
assert len(docs) == 1 # Only the small ticket should be included
assert docs[0].id.endswith("/SMALL-1")


@patch("danswer.connectors.danswer_jira.connector.JIRA_CONNECTOR_MAX_TICKET_SIZE", 50)
def test_fetch_jira_issues_batch_custom_size_limit(
mock_jira_client: MagicMock,
mock_issue_small: MagicMock,
mock_issue_large: MagicMock,
patched_environment: MockFixture,
) -> None:
mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]

docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)

assert count == 2
assert len(docs) == 0 # Both tickets should be skipped due to the low size limit

0 comments on commit c28a8d8

Please sign in to comment.