Files
SneakyCode/tests/unit/test_truncation.py
Phillip Tarrant 76ba490aa2 Add Phase 7: polish and hardening — retry, truncation, sessions, shutdown
- Config extensions: retry backoff, truncation threshold, session persistence
- LLM retry with exponential backoff + jitter on transient errors (5xx, connection)
- Conversation truncation: drops oldest messages preserving first user + recent N
- Session persistence: auto-save/restore with atomic writes, cleanup of old files
- Graceful shutdown: SIGTERM handler, cancel() on AgentLoop, save-on-exit
- Partial message recovery on mid-stream interruption
- New slash commands: /save, /session
- 18 new tests (5 retry, 5 truncation, 4 session, 4 integration workflows)
- README.md and docs/tools.md documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 10:20:16 -05:00

129 lines
4.7 KiB
Python

"""Unit tests for conversation truncation logic."""
from pathlib import Path
import pytest
from app.agent.context import SessionContext
from app.models.config import AgentConfig, AppConfig, LLMConfig
@pytest.fixture
def config() -> AppConfig:
return AppConfig(
llm=LLMConfig(model="test-model", endpoint="http://localhost:11434"),
agent=AgentConfig(
max_conversation_tokens=200,
truncation_keep_recent=3,
truncation_threshold=0.85,
workspace_root=Path("/tmp/test"),
),
)
@pytest.fixture
def ctx(config: AppConfig) -> SessionContext:
return SessionContext(config)
class TestTruncation:
def test_no_truncation_under_threshold(self, ctx: SessionContext) -> None:
"""No messages dropped when under threshold."""
ctx.add_message("user", "Hello")
ctx.add_message("assistant", "Hi there!")
dropped = ctx.truncate_history()
assert dropped == 0
assert ctx.message_count == 2
def test_drops_oldest_messages(self, ctx: SessionContext) -> None:
"""Drops middle messages when over budget."""
# Fill with enough content to exceed the small 200-token budget
ctx.add_message("user", "First message " * 20)
for i in range(8):
ctx.add_message("assistant", f"Response {i} " * 15)
ctx.add_message("user", f"Follow-up {i} " * 15)
# Force the token counter to report over budget
from app.utils.token_counter import TokenUsage
ctx.token_counter.count_usage(TokenUsage(total_tokens=200))
original_count = len(ctx.get_history())
dropped = ctx.truncate_history()
assert dropped > 0
assert len(ctx.get_history()) < original_count
def test_preserves_recent_messages(self, ctx: SessionContext) -> None:
"""The most recent N messages are always preserved."""
ctx.add_message("user", "First message " * 20)
for i in range(10):
ctx.add_message("assistant", f"Response {i} " * 10)
ctx.add_message("user", f"Follow-up {i} " * 10)
from app.utils.token_counter import TokenUsage
ctx.token_counter.count_usage(TokenUsage(total_tokens=200))
history_before = ctx.get_history()
recent_before = history_before[-3:] # keep_recent=3
ctx.truncate_history()
history_after = ctx.get_history()
recent_after = history_after[-3:]
# Recent messages should be preserved
for before, after in zip(recent_before, recent_after):
assert before.content == after.content
def test_preserves_first_user_message(self, ctx: SessionContext) -> None:
"""First user message is always kept."""
first_content = "This is the very first user message"
ctx.add_message("user", first_content)
for i in range(10):
ctx.add_message("assistant", f"Response {i} " * 10)
ctx.add_message("user", f"Follow-up {i} " * 10)
from app.utils.token_counter import TokenUsage
ctx.token_counter.count_usage(TokenUsage(total_tokens=200))
ctx.truncate_history()
history = ctx.get_history()
assert history[0].role == "user"
assert history[0].content == first_content
def test_orphaned_tool_messages_cleaned(self, ctx: SessionContext) -> None:
"""Tool messages without matching tool_call are cleaned up."""
from app.models.tool_call import ToolCall, ToolCallFunction
ctx.add_message("user", "Do something " * 20)
# Assistant with tool call
ctx.add_message(
"assistant",
None,
tool_calls=[ToolCall(id="tc_1", type="function", function=ToolCallFunction(name="read_file", arguments='{"path": "x"}'))],
)
# Tool result for tc_1
ctx.add_message("tool", "file contents " * 20, tool_call_id="tc_1", name="read_file")
# More padding to push over budget
for i in range(8):
ctx.add_message("assistant", f"Analysis {i} " * 15)
ctx.add_message("user", f"Next {i} " * 15)
from app.utils.token_counter import TokenUsage
ctx.token_counter.count_usage(TokenUsage(total_tokens=200))
ctx.truncate_history()
history = ctx.get_history()
# If the assistant message with tc_1 was dropped, the orphaned tool message should also be gone
has_tc1_assistant = any(
m.role == "assistant" and m.tool_calls and any(tc.id == "tc_1" for tc in m.tool_calls)
for m in history
)
has_tc1_tool = any(m.role == "tool" and m.tool_call_id == "tc_1" for m in history)
# Either both exist or neither exists
assert has_tc1_assistant == has_tc1_tool