- Config extensions: retry backoff, truncation threshold, session persistence - LLM retry with exponential backoff + jitter on transient errors (5xx, connection) - Conversation truncation: drops oldest messages preserving first user + recent N - Session persistence: auto-save/restore with atomic writes, cleanup of old files - Graceful shutdown: SIGTERM handler, cancel() on AgentLoop, save-on-exit - Partial message recovery on mid-stream interruption - New slash commands: /save, /session - 18 new tests (5 retry, 5 truncation, 4 session, 4 integration workflows) - README.md and docs/tools.md documentation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
129 lines
4.7 KiB
Python
129 lines
4.7 KiB
Python
"""Unit tests for conversation truncation logic."""
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from app.agent.context import SessionContext
|
|
from app.models.config import AgentConfig, AppConfig, LLMConfig
|
|
|
|
|
|
@pytest.fixture
|
|
def config() -> AppConfig:
|
|
return AppConfig(
|
|
llm=LLMConfig(model="test-model", endpoint="http://localhost:11434"),
|
|
agent=AgentConfig(
|
|
max_conversation_tokens=200,
|
|
truncation_keep_recent=3,
|
|
truncation_threshold=0.85,
|
|
workspace_root=Path("/tmp/test"),
|
|
),
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def ctx(config: AppConfig) -> SessionContext:
|
|
return SessionContext(config)
|
|
|
|
|
|
class TestTruncation:
|
|
def test_no_truncation_under_threshold(self, ctx: SessionContext) -> None:
|
|
"""No messages dropped when under threshold."""
|
|
ctx.add_message("user", "Hello")
|
|
ctx.add_message("assistant", "Hi there!")
|
|
|
|
dropped = ctx.truncate_history()
|
|
assert dropped == 0
|
|
assert ctx.message_count == 2
|
|
|
|
def test_drops_oldest_messages(self, ctx: SessionContext) -> None:
|
|
"""Drops middle messages when over budget."""
|
|
# Fill with enough content to exceed the small 200-token budget
|
|
ctx.add_message("user", "First message " * 20)
|
|
for i in range(8):
|
|
ctx.add_message("assistant", f"Response {i} " * 15)
|
|
ctx.add_message("user", f"Follow-up {i} " * 15)
|
|
|
|
# Force the token counter to report over budget
|
|
from app.utils.token_counter import TokenUsage
|
|
ctx.token_counter.count_usage(TokenUsage(total_tokens=200))
|
|
|
|
original_count = len(ctx.get_history())
|
|
dropped = ctx.truncate_history()
|
|
|
|
assert dropped > 0
|
|
assert len(ctx.get_history()) < original_count
|
|
|
|
def test_preserves_recent_messages(self, ctx: SessionContext) -> None:
|
|
"""The most recent N messages are always preserved."""
|
|
ctx.add_message("user", "First message " * 20)
|
|
for i in range(10):
|
|
ctx.add_message("assistant", f"Response {i} " * 10)
|
|
ctx.add_message("user", f"Follow-up {i} " * 10)
|
|
|
|
from app.utils.token_counter import TokenUsage
|
|
ctx.token_counter.count_usage(TokenUsage(total_tokens=200))
|
|
|
|
history_before = ctx.get_history()
|
|
recent_before = history_before[-3:] # keep_recent=3
|
|
|
|
ctx.truncate_history()
|
|
|
|
history_after = ctx.get_history()
|
|
recent_after = history_after[-3:]
|
|
|
|
# Recent messages should be preserved
|
|
for before, after in zip(recent_before, recent_after):
|
|
assert before.content == after.content
|
|
|
|
def test_preserves_first_user_message(self, ctx: SessionContext) -> None:
|
|
"""First user message is always kept."""
|
|
first_content = "This is the very first user message"
|
|
ctx.add_message("user", first_content)
|
|
for i in range(10):
|
|
ctx.add_message("assistant", f"Response {i} " * 10)
|
|
ctx.add_message("user", f"Follow-up {i} " * 10)
|
|
|
|
from app.utils.token_counter import TokenUsage
|
|
ctx.token_counter.count_usage(TokenUsage(total_tokens=200))
|
|
|
|
ctx.truncate_history()
|
|
|
|
history = ctx.get_history()
|
|
assert history[0].role == "user"
|
|
assert history[0].content == first_content
|
|
|
|
def test_orphaned_tool_messages_cleaned(self, ctx: SessionContext) -> None:
|
|
"""Tool messages without matching tool_call are cleaned up."""
|
|
from app.models.tool_call import ToolCall, ToolCallFunction
|
|
|
|
ctx.add_message("user", "Do something " * 20)
|
|
# Assistant with tool call
|
|
ctx.add_message(
|
|
"assistant",
|
|
None,
|
|
tool_calls=[ToolCall(id="tc_1", type="function", function=ToolCallFunction(name="read_file", arguments='{"path": "x"}'))],
|
|
)
|
|
# Tool result for tc_1
|
|
ctx.add_message("tool", "file contents " * 20, tool_call_id="tc_1", name="read_file")
|
|
# More padding to push over budget
|
|
for i in range(8):
|
|
ctx.add_message("assistant", f"Analysis {i} " * 15)
|
|
ctx.add_message("user", f"Next {i} " * 15)
|
|
|
|
from app.utils.token_counter import TokenUsage
|
|
ctx.token_counter.count_usage(TokenUsage(total_tokens=200))
|
|
|
|
ctx.truncate_history()
|
|
|
|
history = ctx.get_history()
|
|
# If the assistant message with tc_1 was dropped, the orphaned tool message should also be gone
|
|
has_tc1_assistant = any(
|
|
m.role == "assistant" and m.tool_calls and any(tc.id == "tc_1" for tc in m.tool_calls)
|
|
for m in history
|
|
)
|
|
has_tc1_tool = any(m.role == "tool" and m.tool_call_id == "tc_1" for m in history)
|
|
|
|
# Either both exist or neither exists
|
|
assert has_tc1_assistant == has_tc1_tool
|