Add Phase 7: polish and hardening — retry, truncation, sessions, shutdown

- Config extensions: retry backoff, truncation threshold, session persistence
- LLM retry with exponential backoff + jitter on transient errors (5xx, connection)
- Conversation truncation: drops oldest messages preserving first user + recent N
- Session persistence: auto-save/restore with atomic writes, cleanup of old files
- Graceful shutdown: SIGTERM handler, cancel() on AgentLoop, save-on-exit
- Partial message recovery on mid-stream interruption
- New slash commands: /save, /session
- 18 new tests (5 retry, 5 truncation, 4 session, 4 integration workflows)
- README.md and docs/tools.md documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-11 10:20:16 -05:00
parent 82846d6236
commit 76ba490aa2
16 changed files with 1550 additions and 12 deletions

148
app/services/session.py Normal file
View File

@@ -0,0 +1,148 @@
"""Session persistence — auto-save and restore conversation state."""
import hashlib
import json
from datetime import UTC, datetime
from pathlib import Path
from typing import TYPE_CHECKING
from pydantic import BaseModel, Field
from app.models.config import SessionConfig
from app.utils.logging import get_logger
if TYPE_CHECKING:
from app.agent.context import SessionContext
logger = get_logger(__name__)
class SessionData(BaseModel):
"""Serialized session state for persistence."""
version: int = Field(default=1, description="Schema version for forward compatibility")
session_id: str = Field(description="Unique session identifier")
created_at: str = Field(description="ISO timestamp of session creation")
updated_at: str = Field(description="ISO timestamp of last update")
model: str = Field(description="LLM model name used in session")
workspace_root: str = Field(description="Workspace root path")
messages: list[dict] = Field(default_factory=list, description="Serialized messages")
token_usage: dict = Field(default_factory=dict, description="Cumulative token usage")
class SessionManager:
"""Manages session file I/O: save, load, restore, and cleanup.
Session files are keyed by a hash of the workspace root path so that
each project directory has its own session history.
"""
def __init__(self, config: SessionConfig, workspace_root: Path, model: str) -> None:
"""Initialize session manager.
Args:
config: Session configuration.
workspace_root: Absolute path to workspace root.
model: LLM model name for session metadata.
"""
self._config = config
self._workspace_root = workspace_root
self._model = model
self._workspace_hash = hashlib.sha256(str(workspace_root).encode()).hexdigest()[:12]
self._session_dir = workspace_root / config.session_dir
self._session_id = f"{self._workspace_hash}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
def save(self, ctx: "SessionContext") -> Path:
"""Save session state to a JSON file via atomic write.
Args:
ctx: Session context to persist.
Returns:
Path to the saved session file.
"""
self._session_dir.mkdir(parents=True, exist_ok=True)
serialized = ctx.to_serializable()
data = SessionData(
session_id=self._session_id,
created_at=ctx.start_time.isoformat(),
updated_at=datetime.now(UTC).isoformat(),
model=self._model,
workspace_root=str(self._workspace_root),
messages=serialized["messages"],
token_usage=serialized["token_usage"],
)
file_path = self._session_dir / f"{self._session_id}.json"
tmp_path = file_path.with_suffix(".tmp")
tmp_path.write_text(data.model_dump_json(indent=2), encoding="utf-8")
tmp_path.rename(file_path)
logger.debug("session_saved", path=str(file_path))
return file_path
def load_latest(self) -> SessionData | None:
"""Find and load the newest session file for this workspace.
Returns:
SessionData if a valid session is found, None otherwise.
"""
if not self._session_dir.exists():
return None
session_files = sorted(
self._session_dir.glob(f"{self._workspace_hash}_*.json"),
key=lambda p: p.stat().st_mtime,
reverse=True,
)
for path in session_files:
try:
raw = json.loads(path.read_text(encoding="utf-8"))
return SessionData(**raw)
except (json.JSONDecodeError, ValueError, OSError) as e:
logger.warning("session_load_error", path=str(path), error=str(e))
continue
return None
def restore(self, data: SessionData, ctx: "SessionContext") -> None:
"""Replay session data into a SessionContext.
Args:
data: Saved session data to restore.
ctx: Session context to populate.
"""
ctx.restore_from({
"messages": data.messages,
"token_usage": data.token_usage,
})
# Preserve the original session ID for continuity
self._session_id = data.session_id
logger.info("session_restored", session_id=data.session_id, messages=len(data.messages))
def cleanup_old(self) -> int:
"""Delete session files older than max_session_age_hours.
Returns:
Number of files deleted.
"""
if not self._session_dir.exists():
return 0
cutoff = datetime.now(UTC).timestamp() - (self._config.max_session_age_hours * 3600)
deleted = 0
for path in self._session_dir.glob("*.json"):
try:
if path.stat().st_mtime < cutoff:
path.unlink()
deleted += 1
except OSError:
continue
if deleted > 0:
logger.info("sessions_cleaned", deleted=deleted)
return deleted