Files
SneakyCode/tests/unit/test_retry.py
Phillip Tarrant 76ba490aa2 Add Phase 7: polish and hardening — retry, truncation, sessions, shutdown
- Config extensions: retry backoff, truncation threshold, session persistence
- LLM retry with exponential backoff + jitter on transient errors (5xx, connection)
- Conversation truncation: drops oldest messages preserving first user + recent N
- Session persistence: auto-save/restore with atomic writes, cleanup of old files
- Graceful shutdown: SIGTERM handler, cancel() on AgentLoop, save-on-exit
- Partial message recovery on mid-stream interruption
- New slash commands: /save, /session
- 18 new tests (5 retry, 5 truncation, 4 session, 4 integration workflows)
- README.md and docs/tools.md documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 10:20:16 -05:00

126 lines
4.4 KiB
Python

"""Unit tests for LLM retry with exponential backoff."""
from unittest.mock import AsyncMock, patch
import pytest
from app.models.config import LLMConfig
from app.models.message import Message
from app.services.llm import LLMClient, LLMConnectionError, LLMResponseError
@pytest.fixture
def llm_config() -> LLMConfig:
return LLMConfig(
model="test-model",
endpoint="http://localhost:11434",
max_retries=3,
retry_backoff_base=0.01,
retry_backoff_max=0.05,
)
@pytest.fixture
def client(llm_config: LLMConfig) -> LLMClient:
return LLMClient(llm_config)
@pytest.fixture
def messages() -> list[Message]:
return [Message(role="user", content="Hello")]
class TestRetry:
@pytest.mark.asyncio
async def test_succeeds_without_retry(self, client: LLMClient, messages: list[Message]) -> None:
"""Successful stream doesn't retry."""
call_count = 0
async def fake_stream(*args, **kwargs):
nonlocal call_count
call_count += 1
yield {"choices": [{"delta": {"content": "Hi"}}]}
client.stream_chat = fake_stream # type: ignore[assignment]
collected = []
async for chunk in client.stream_chat_with_retry(messages):
collected.append(chunk)
assert len(collected) == 1
assert call_count == 1
@pytest.mark.asyncio
async def test_retries_on_connection_error(self, client: LLMClient, messages: list[Message]) -> None:
"""Retries on LLMConnectionError, then succeeds."""
call_count = 0
async def flaky_stream(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count < 3:
raise LLMConnectionError("Connection refused")
yield {"choices": [{"delta": {"content": "OK"}}]}
client.stream_chat = flaky_stream # type: ignore[assignment]
with patch("app.services.llm.asyncio.sleep", new_callable=AsyncMock):
collected = []
async for chunk in client.stream_chat_with_retry(messages):
collected.append(chunk)
assert len(collected) == 1
assert call_count == 3
@pytest.mark.asyncio
async def test_retries_on_5xx(self, client: LLMClient, messages: list[Message]) -> None:
"""Retries on 5xx LLMResponseError."""
call_count = 0
async def server_error_stream(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count < 2:
raise LLMResponseError("Internal Server Error", status_code=500)
yield {"choices": [{"delta": {"content": "OK"}}]}
client.stream_chat = server_error_stream # type: ignore[assignment]
with patch("app.services.llm.asyncio.sleep", new_callable=AsyncMock):
collected = []
async for chunk in client.stream_chat_with_retry(messages):
collected.append(chunk)
assert len(collected) == 1
assert call_count == 2
@pytest.mark.asyncio
async def test_no_retry_on_4xx(self, client: LLMClient, messages: list[Message]) -> None:
"""Does NOT retry on 4xx errors — raises immediately."""
async def bad_request_stream(*args, **kwargs):
raise LLMResponseError("Bad Request", status_code=400)
yield # pragma: no cover — make this an async generator
client.stream_chat = bad_request_stream # type: ignore[assignment]
with pytest.raises(LLMResponseError, match="Bad Request"):
async for _ in client.stream_chat_with_retry(messages):
pass # pragma: no cover
@pytest.mark.asyncio
async def test_respects_max_retries(self, client: LLMClient, messages: list[Message]) -> None:
"""After exhausting retries, re-raises the last exception."""
async def always_fail(*args, **kwargs):
raise LLMConnectionError("Down forever")
yield # pragma: no cover
client.stream_chat = always_fail # type: ignore[assignment]
with patch("app.services.llm.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
with pytest.raises(LLMConnectionError, match="Down forever"):
async for _ in client.stream_chat_with_retry(messages):
pass # pragma: no cover
# Should have slept max_retries times (3 retries after initial attempt)
assert mock_sleep.call_count == 3