From 2a5312f5b4ad25d8fb453fb0eabc5d718d3d7af2 Mon Sep 17 00:00:00 2001 From: Julia Schessner Date: Wed, 15 Jan 2025 15:32:40 +0100 Subject: [PATCH] truncation tests --- alphastats/llm/llm_integration.py | 2 +- tests/llm/test_llm_integration.py | 48 ++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/alphastats/llm/llm_integration.py b/alphastats/llm/llm_integration.py index b09603e0..55200c76 100644 --- a/alphastats/llm/llm_integration.py +++ b/alphastats/llm/llm_integration.py @@ -254,7 +254,7 @@ def _truncate_conversation_history( warnings.warn( f"Removing corresponsing tool output as well.\nRemoved message:{removed_toolmessage[MessageKeys.ROLE]}: {removed_toolmessage[MessageKeys.CONTENT][0:30]}..." ) - if len(self._messages) == 0: + if len(self._messages) == oldest_not_pinned: raise ValueError( "Truncating conversation history failed, as the artifact from the last call exceeds the token limit. Please increase the token limit and reset the LLM analysis." ) diff --git a/tests/llm/test_llm_integration.py b/tests/llm/test_llm_integration.py index 7bc654c4..8f6ed9aa 100644 --- a/tests/llm/test_llm_integration.py +++ b/tests/llm/test_llm_integration.py @@ -180,7 +180,7 @@ def test_append_message_with_tool_calls(llm_integration): (10, 20, 100, 5), # Should truncate to 5 messages ], ) -def test_truncate_conversation_history( +def test_truncate_conversation_history_success( llm_integration, num_messages, message_length, max_tokens, expected_messages ): """Test conversation history truncation with different scenarios""" @@ -197,6 +197,52 @@ def test_truncate_conversation_history( assert llm_integration._messages[0]["role"] == "system" +def test_truncate_conversation_history_pinned_too_large(llm_integration): + """Test conversation history truncation with pinned messages that exceed the token limit""" + # Add multiple messages + message_content = "Test " * 100 + llm_integration._max_tokens = 200 + llm_integration._append_message("user", message_content.strip(), pin_message=True) + llm_integration._append_message("user", message_content.strip(), pin_message=False) + with pytest.raises(ValueError, match=r".*all remaining messages are pinned.*"): + llm_integration._append_message( + "assistant", message_content.strip(), pin_message=True + ) + + +def test_truncate_conversation_history_tool_output_popped(llm_integration): + message_content = "Test " * 50 + llm_integration._max_tokens = 120 + # removal of assistant would suffice for total tokens, but tool output should be dropped as well + llm_integration._append_message("assistant", message_content.strip()) + llm_integration._append_message("tool", message_content.strip()) + with pytest.warns( + UserWarning, match=r".*Truncating conversation history.*" + ), pytest.warns(UserWarning, match=r".*Removing corresponsing tool.*"): + llm_integration._append_message("user", message_content.strip()) + + assert len(llm_integration._messages) == 2 + assert llm_integration._messages[0]["role"] == "system" + assert llm_integration._messages[1]["role"] == "user" + + +def test_truncate_conversation_history_last_tool_output_error(llm_integration): + message_content = "Test " * 50 + llm_integration._max_tokens = 100 + # removal of assistant would suffice for total tokens, but tool output should be dropped as well + llm_integration._append_message("assistant", message_content.strip()) + with pytest.raises(ValueError, match=r".*last call exceeds the token limit.*"): + llm_integration._append_message("tool", message_content.strip()) + + +def test_truncate_conversation_history_single_large_message(llm_integration): + llm_integration._max_tokens = 1 + with pytest.raises( + ValueError, match=r".*only remaining message exceeds the token limit*" + ): + llm_integration._truncate_conversation_history() + + def test_estimate_tokens_gpt(llm_integration): """Test token estimation for a given message""" message_content = "Test message"