mitodl · arslanashraf7 · May 7, 2025 · Apr 30, 2025 · May 2, 2025 · arslanashraf7
diff --git a/docker-compose.services.yml b/docker-compose.services.yml
@@ -101,6 +101,7 @@ services:
     environment:
       - KEYCLOAK_ADMIN=${KEYCLOAK_SVC_ADMIN:-admin}
       - KEYCLOAK_ADMIN_PASSWORD=${KEYCLOAK_SVC_ADMIN_PASSWORD:-admin}
+      - "_JAVA_OPTIONS=${JAVA_OPTIONS:-}" # Load _JAVA_OPTIONS from env, fallback to empty string
     networks:
       default:
         aliases:

diff --git a/learning_resources/content_summarizer.py b/learning_resources/content_summarizer.py
@@ -7,6 +7,10 @@
 from langchain_community.chat_models import ChatLiteLLM
 from typing_extensions import TypedDict
 
+from learning_resources.exceptions import (
+    FlashcardsGenerationError,
+    SummaryGenerationError,
+)
 from learning_resources.models import (
     ContentFile,
     ContentSummarizerConfiguration,
@@ -124,31 +128,36 @@ def get_unprocessed_content_file_ids(
 
     def summarize_content_files_by_ids(
         self, content_file_ids: list[int], overwrite
-    ) -> None:
+    ) -> list[str]:
         """Process multiple content files by id.
 
         Args:
             - ids (list[int]): List of content file ids to process
             - overwrite (bool): Whether to overwrite existing summary and flashcards
 
         Returns:
-            - None
+            - list[str]: List of status messages for each content file
         """
+        status_messages = []
         for content_file_id in content_file_ids:
-            self.summarize_single_content_file(content_file_id, overwrite=overwrite)
+            status_msg = self.summarize_single_content_file(
+                content_file_id, overwrite=overwrite
+            )
+            status_messages.append(status_msg)
+        return status_messages
 
     def summarize_single_content_file(
         self,
         content_file_id: int,
         overwrite,
-    ) -> None:
+    ) -> str:
         """Process a single content file
         Args:
             - content_file_id (int): Id of the content file to process
             - overwrite (bool): Whether to overwrite existing summary and flashcards
 
         Returns:
-            - None
+            - str: A string message indicating the status of the summarization
         """
         try:
             with transaction.atomic():
@@ -175,10 +184,24 @@ def summarize_single_content_file(
 
                     if updated:
                         content_file.save()
-
-        except Exception:
+                    return f"Summarization succeeded for CONTENT_FILE_ID: {content_file_id}"  # noqa: E501
+                return f"Summarization skipped for CONTENT_FILE_ID: {content_file_id}"
+        except SummaryGenerationError as exc:
+            # Log and return a specific readable error message when summary
+            # generation fails.
+            logger.exception("Error processing content: %d", content_file.id)
+            return f"Summary generation failed for CONTENT_FILE_ID: {content_file_id}\nError: {exc.args[0]}\n\n"  # noqa: E501
+        except FlashcardsGenerationError as exc:
+            # Log and return a specific readable error message when flashcards
+            # generation fails.
+            return f"Flashcards generation failed for CONTENT_FILE_ID: {content_file_id}\nError: {exc.args[0]}\n\n"  # noqa: E501
+        except Exception as exc:
+            # Log and return a specific readable error message when an  unknown
+            # error occurs.
             logger.exception("Error processing content: %d", content_file.id)
-            raise
+            return (
+                f"Summarization failed for CONTENT_FILE_ID: {content_file_id}\nError: {exc.args[0]}\n\n",  # noqa: E501
+            )
 
     def _get_llm(self, model=None, temperature=0.0, max_tokens=1000) -> ChatLiteLLM:
         """Get the ChatLiteLLM instance"""
@@ -216,11 +239,15 @@ def _generate_summary(self, content: str, llm_model: str) -> str:
             generated_summary = response.content
             logger.info("Generated summary: %s", generated_summary)
 
-        except Exception:
+        except Exception as exc:
+            # We do not want to raise the exception as is, we will log the exception and
+            # raise SummaryGenerationError that will be used to make further decisions
+            # in the code.
             logger.exception(
                 "An error occurred while generating summary using model: %s", llm_model
             )
-            raise
+            raise SummaryGenerationError(exc) from exc
+
         else:
             return generated_summary
 
@@ -243,12 +270,14 @@ def _generate_flashcards(
             )
             generated_flashcards = response.get("flashcards")
             logger.info("Generated flashcards: %s", generated_flashcards)
-
-        except Exception:
+        except Exception as exc:
+            # We do not want to raise the exception as is, we will log the exception and
+            # raise FlashcardsGenerationError that will be used to make further
+            # decisions in the code.
             logger.exception(
                 "An error occurred while generating flashcards using model: %s",
                 llm_model,
             )
-            raise
+            raise FlashcardsGenerationError(exc) from exc
         else:
             return generated_flashcards
diff --git a/learning_resources/content_summarizer_test.py b/learning_resources/content_summarizer_test.py
@@ -6,6 +6,10 @@
     PlatformType,
 )
 from learning_resources.content_summarizer import ContentSummarizer
+from learning_resources.exceptions import (
+    FlashcardsGenerationError,
+    SummaryGenerationError,
+)
 from learning_resources.factories import (
     ContentFileFactory,
     ContentSummarizerConfigurationFactory,
@@ -242,9 +246,9 @@ def test_get_unprocessed_content_files_with_platform_and_config(
 def test_summarize_content_files_by_ids(
     processable_content_files, mock_summarize_single_content_file
 ):
-    """The summarizer should process content files that are processable"""
+    """The summarizer should process content files that are processable and return the status results"""
     summarizer = ContentSummarizer()
-    summarizer.summarize_content_files_by_ids(
+    results = summarizer.summarize_content_files_by_ids(
         overwrite=False,
         content_file_ids=[
             content_file.id for content_file in processable_content_files
@@ -253,6 +257,8 @@ def test_summarize_content_files_by_ids(
     assert mock_summarize_single_content_file.call_count == len(
         processable_content_files
     )
+    assert isinstance(results, list)
+    assert len(results) == len(processable_content_files)
 
 
 def test_summarize_single_content_file(mocker, processable_content_files):
@@ -332,3 +338,77 @@ def test_process_single_file_calls_llm_summary(
         assert mock_instance.with_structured_output.call_count == 1
     elif has_flashcards:
         assert mock_instance.invoke.call_count == 1
+
+
+@pytest.mark.parametrize(
+    ("process_type", "expected_exception"),
+    [("summary", SummaryGenerationError), ("flashcards", FlashcardsGenerationError)],
+)
+def test_generate_summary_flashcards_exception(
+    mocker, processable_content_files, settings, process_type, expected_exception
+):
+    """Test the exception handling in the generate_summary and generate_flashcards methods"""
+    settings.OPENAI_API_KEY = "test"
+    summarizer = ContentSummarizer()
+    content_file = processable_content_files[0]
+    content_file.save()
+
+    # Mock the ChatLiteLLM class and its methods
+    mock_chat_llm = mocker.patch(
+        "learning_resources.content_summarizer.ChatLiteLLM", autospec=True
+    )
+    mock_instance = mock_chat_llm.return_value
+
+    # Mock the response for _generate_summary to raise an exception
+    mock_instance.invoke.side_effect = Exception("Test exception")
+    # Mock the response for _generate_flashcards to raise an exception
+    mock_instance.with_structured_output.return_value.invoke.side_effect = Exception(
+        "INVALID_FORMAT"
+    )
+
+    if process_type == "summary":
+        with pytest.raises(expected_exception):
+            summarizer._generate_summary(  # noqa: SLF001
+                llm_model="llm_model", content=content_file.content
+            )
+    else:
+        with pytest.raises(expected_exception):
+            summarizer._generate_flashcards(  # noqa: SLF001
+                llm_model="llm_model", content=content_file.content
+            )
+
+
+def test_summarize_single_content_file_with_exception(
+    mocker, processable_content_files, settings
+):
+    """Test the exception handling in the summarize_single_content_file method"""
+    settings.OPENAI_API_KEY = "test"
+    summarizer = ContentSummarizer()
+    content_file = processable_content_files[0]
+
+    # Mock the ChatLiteLLM class and its methods
+    mock_chat_llm = mocker.patch(
+        "learning_resources.content_summarizer.ChatLiteLLM", autospec=True
+    )
+    mock_instance = mock_chat_llm.return_value
+
+    # Mock the response for _generate_summary to raise an exception
+    mock_instance.invoke.side_effect = Exception("Test exception")
+    # Mock the response for _generate_flashcards to raise an exception
+    mock_instance.with_structured_output.return_value.invoke.side_effect = Exception(
+        "INVALID_FORMAT"
+    )
+
+    error = summarizer.summarize_single_content_file(content_file.id, overwrite=False)
+    assert (
+        error
+        == f"Summary generation failed for CONTENT_FILE_ID: {content_file.id}\nError: Test exception\n\n"
+    )
+    content_file.summary = "Test summary"
+    content_file.save()
+    content_file.refresh_from_db()
+    error = summarizer.summarize_single_content_file(content_file.id, overwrite=False)
+    assert (
+        error
+        == f"Flashcards generation failed for CONTENT_FILE_ID: {content_file.id}\nError: INVALID_FORMAT\n\n"
+    )
diff --git a/learning_resources/exceptions.py b/learning_resources/exceptions.py
@@ -13,3 +13,11 @@ class PostHogAuthenticationError(Exception):
 
 class PostHogQueryError(Exception):
     """Raised if the PostHog query API returns a non-authentication error."""
+
+
+class SummaryGenerationError(Exception):
+    """Raised if the summary generation fails for a content file."""
+
+
+class FlashcardsGenerationError(Exception):
+    """Raised if the flashcards generation fails for a content file."""
diff --git a/learning_resources/management/commands/generate_summary_flashcards.py b/learning_resources/management/commands/generate_summary_flashcards.py
@@ -1,5 +1,7 @@
 """Management command to run the content summarizer"""
 
+import itertools
+
 from django.conf import settings
 from django.core.management import BaseCommand
 
@@ -91,9 +93,16 @@ def handle(self, *args, **options):  # noqa: ARG002
             self.stdout.write("Waiting on task...")
 
             start = now_in_utc()
-            summarizer_task.get()
+            results = summarizer_task.get()
+
+            # Log the summarization stats
+            flat_results = list(itertools.chain(*results))
+            for result in flat_results:
+                self.stdout.write(f"{result}")
 
             total_seconds = (now_in_utc() - start).total_seconds()
             self.stdout.write(
-                f"Content file summarizer finished, took {total_seconds} seconds"
+                self.style.SUCCESS(
+                    f"Content file summarizer finished, took {total_seconds} seconds"
+                )
             )
diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py
@@ -424,7 +424,7 @@ def summarize_content_files_task(
         - None
     """
     summarizer = ContentSummarizer()
-    summarizer.summarize_content_files_by_ids(content_file_ids, overwrite)
+    return summarizer.summarize_content_files_by_ids(content_file_ids, overwrite)
 
 
 @app.task(bind=True, acks_late=True)