From 2a5312f5b4ad25d8fb453fb0eabc5d718d3d7af2 Mon Sep 17 00:00:00 2001
From: Julia Schessner <schessner@biochem.mpg.de>
Date: Wed, 15 Jan 2025 15:32:40 +0100
Subject: [PATCH] truncation tests

---
 alphastats/llm/llm_integration.py |  2 +-
 tests/llm/test_llm_integration.py | 48 ++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/alphastats/llm/llm_integration.py b/alphastats/llm/llm_integration.py
index b09603e0..55200c76 100644
--- a/alphastats/llm/llm_integration.py
+++ b/alphastats/llm/llm_integration.py
@@ -254,7 +254,7 @@ def _truncate_conversation_history(
                 warnings.warn(
                     f"Removing corresponsing tool output as well.\nRemoved message:{removed_toolmessage[MessageKeys.ROLE]}: {removed_toolmessage[MessageKeys.CONTENT][0:30]}..."
                 )
-                if len(self._messages) == 0:
+                if len(self._messages) == oldest_not_pinned:
                     raise ValueError(
                         "Truncating conversation history failed, as the artifact from the last call exceeds the token limit. Please increase the token limit and reset the LLM analysis."
                     )
diff --git a/tests/llm/test_llm_integration.py b/tests/llm/test_llm_integration.py
index 7bc654c4..8f6ed9aa 100644
--- a/tests/llm/test_llm_integration.py
+++ b/tests/llm/test_llm_integration.py
@@ -180,7 +180,7 @@ def test_append_message_with_tool_calls(llm_integration):
         (10, 20, 100, 5),  # Should truncate to 5 messages
     ],
 )
-def test_truncate_conversation_history(
+def test_truncate_conversation_history_success(
     llm_integration, num_messages, message_length, max_tokens, expected_messages
 ):
     """Test conversation history truncation with different scenarios"""
@@ -197,6 +197,52 @@ def test_truncate_conversation_history(
     assert llm_integration._messages[0]["role"] == "system"
 
 
+def test_truncate_conversation_history_pinned_too_large(llm_integration):
+    """Test conversation history truncation with pinned messages that exceed the token limit"""
+    # Add multiple messages
+    message_content = "Test " * 100
+    llm_integration._max_tokens = 200
+    llm_integration._append_message("user", message_content.strip(), pin_message=True)
+    llm_integration._append_message("user", message_content.strip(), pin_message=False)
+    with pytest.raises(ValueError, match=r".*all remaining messages are pinned.*"):
+        llm_integration._append_message(
+            "assistant", message_content.strip(), pin_message=True
+        )
+
+
+def test_truncate_conversation_history_tool_output_popped(llm_integration):
+    message_content = "Test " * 50
+    llm_integration._max_tokens = 120
+    # removal of assistant would suffice for total tokens, but tool output should be dropped as well
+    llm_integration._append_message("assistant", message_content.strip())
+    llm_integration._append_message("tool", message_content.strip())
+    with pytest.warns(
+        UserWarning, match=r".*Truncating conversation history.*"
+    ), pytest.warns(UserWarning, match=r".*Removing corresponsing tool.*"):
+        llm_integration._append_message("user", message_content.strip())
+
+    assert len(llm_integration._messages) == 2
+    assert llm_integration._messages[0]["role"] == "system"
+    assert llm_integration._messages[1]["role"] == "user"
+
+
+def test_truncate_conversation_history_last_tool_output_error(llm_integration):
+    message_content = "Test " * 50
+    llm_integration._max_tokens = 100
+    # removal of assistant would suffice for total tokens, but tool output should be dropped as well
+    llm_integration._append_message("assistant", message_content.strip())
+    with pytest.raises(ValueError, match=r".*last call exceeds the token limit.*"):
+        llm_integration._append_message("tool", message_content.strip())
+
+
+def test_truncate_conversation_history_single_large_message(llm_integration):
+    llm_integration._max_tokens = 1
+    with pytest.raises(
+        ValueError, match=r".*only remaining message exceeds the token limit*"
+    ):
+        llm_integration._truncate_conversation_history()
+
+
 def test_estimate_tokens_gpt(llm_integration):
     """Test token estimation for a given message"""
     message_content = "Test message"