Add Phase 3: LLM integration with Ollama streaming and preflight checks

Wire the REPL to a local Ollama instance via streaming HTTP (SSE).
LLMClient handles async streaming chat, StreamHandler renders live
Markdown via Rich and accumulates tool call fragments. Startup now
runs a preflight check that verifies Ollama is reachable and the
configured model is pulled, exiting with a clear message on failure.
Also adds .gitignore and updates config to use qwen3.5.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-11 07:27:56 -05:00
parent 5aff2183d6
commit adbb442ce5
2 changed files with 60 additions and 0 deletions

View File

@@ -55,6 +55,12 @@ def parse_args() -> argparse.Namespace:
return parser.parse_args() return parser.parse_args()
async def _preflight(config: AppConfig) -> None:
"""Check that Ollama is reachable and the configured model is available."""
async with LLMClient(config.llm) as client:
await client.preflight_check()
async def _run_repl( async def _run_repl(
ctx: SessionContext, ctx: SessionContext,
config: AppConfig, config: AppConfig,
@@ -171,6 +177,18 @@ def main() -> None:
if args.verbose: if args.verbose:
print_info("Verbose mode enabled") print_info("Verbose mode enabled")
# Preflight: check Ollama is reachable and model exists
try:
asyncio.run(_preflight(config))
except LLMConnectionError as e:
print_error(str(e))
sys.exit(1)
except LLMError as e:
print_error(str(e))
sys.exit(1)
print_success("Ollama connected, model ready.")
# Create session and start REPL # Create session and start REPL
ctx = SessionContext(config) ctx = SessionContext(config)
logger.info("startup_complete") logger.info("startup_complete")

View File

@@ -59,6 +59,48 @@ class LLMClient:
timeout=httpx.Timeout(config.timeout, connect=10.0), timeout=httpx.Timeout(config.timeout, connect=10.0),
) )
async def preflight_check(self) -> None:
"""Verify the endpoint is reachable and the configured model is available.
Raises:
LLMConnectionError: If the endpoint is unreachable.
LLMResponseError: If the model is not found or the endpoint returns an error.
"""
# Check endpoint is reachable
try:
response = await self._client.get("/api/tags")
except (httpx.ConnectError, httpx.HTTPError, OSError) as e:
raise LLMConnectionError(
f"Cannot reach Ollama at {self._config.endpoint}. Is Ollama running?"
) from e
except httpx.TimeoutException as e:
raise LLMConnectionError(
f"Timed out connecting to {self._config.endpoint}."
) from e
if response.status_code != 200:
raise LLMResponseError(
f"Ollama returned {response.status_code} from /api/tags.",
status_code=response.status_code,
)
# Check model is available
try:
data = response.json()
except (ValueError, KeyError):
logger.warning("preflight_parse_error", msg="Could not parse /api/tags response")
return
available = [m.get("name", "") for m in data.get("models", [])]
model = self._config.model
# Match with or without tag suffix (e.g. "qwen3.5" matches "qwen3.5:latest")
if not any(model == name or model == name.split(":")[0] for name in available):
available_str = ", ".join(available) if available else "(none)"
raise LLMResponseError(
f"Model '{model}' not found. Available models: {available_str}"
)
async def stream_chat(self, messages: list[Message]) -> AsyncIterator[dict]: async def stream_chat(self, messages: list[Message]) -> AsyncIterator[dict]:
"""Stream a chat completion request, yielding parsed SSE chunks. """Stream a chat completion request, yielding parsed SSE chunks.