Add Phase 3: LLM integration with Ollama streaming and preflight checks
Wire the REPL to a local Ollama instance via streaming HTTP (SSE). LLMClient handles async streaming chat, StreamHandler renders live Markdown via Rich and accumulates tool call fragments. Startup now runs a preflight check that verifies Ollama is reachable and the configured model is pulled, exiting with a clear message on failure. Also adds .gitignore and updates config to use qwen3.5. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
18
app/main.py
18
app/main.py
@@ -55,6 +55,12 @@ def parse_args() -> argparse.Namespace:
|
|||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
async def _preflight(config: AppConfig) -> None:
|
||||||
|
"""Check that Ollama is reachable and the configured model is available."""
|
||||||
|
async with LLMClient(config.llm) as client:
|
||||||
|
await client.preflight_check()
|
||||||
|
|
||||||
|
|
||||||
async def _run_repl(
|
async def _run_repl(
|
||||||
ctx: SessionContext,
|
ctx: SessionContext,
|
||||||
config: AppConfig,
|
config: AppConfig,
|
||||||
@@ -171,6 +177,18 @@ def main() -> None:
|
|||||||
if args.verbose:
|
if args.verbose:
|
||||||
print_info("Verbose mode enabled")
|
print_info("Verbose mode enabled")
|
||||||
|
|
||||||
|
# Preflight: check Ollama is reachable and model exists
|
||||||
|
try:
|
||||||
|
asyncio.run(_preflight(config))
|
||||||
|
except LLMConnectionError as e:
|
||||||
|
print_error(str(e))
|
||||||
|
sys.exit(1)
|
||||||
|
except LLMError as e:
|
||||||
|
print_error(str(e))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print_success("Ollama connected, model ready.")
|
||||||
|
|
||||||
# Create session and start REPL
|
# Create session and start REPL
|
||||||
ctx = SessionContext(config)
|
ctx = SessionContext(config)
|
||||||
logger.info("startup_complete")
|
logger.info("startup_complete")
|
||||||
|
|||||||
@@ -59,6 +59,48 @@ class LLMClient:
|
|||||||
timeout=httpx.Timeout(config.timeout, connect=10.0),
|
timeout=httpx.Timeout(config.timeout, connect=10.0),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def preflight_check(self) -> None:
|
||||||
|
"""Verify the endpoint is reachable and the configured model is available.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
LLMConnectionError: If the endpoint is unreachable.
|
||||||
|
LLMResponseError: If the model is not found or the endpoint returns an error.
|
||||||
|
"""
|
||||||
|
# Check endpoint is reachable
|
||||||
|
try:
|
||||||
|
response = await self._client.get("/api/tags")
|
||||||
|
except (httpx.ConnectError, httpx.HTTPError, OSError) as e:
|
||||||
|
raise LLMConnectionError(
|
||||||
|
f"Cannot reach Ollama at {self._config.endpoint}. Is Ollama running?"
|
||||||
|
) from e
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
raise LLMConnectionError(
|
||||||
|
f"Timed out connecting to {self._config.endpoint}."
|
||||||
|
) from e
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise LLMResponseError(
|
||||||
|
f"Ollama returned {response.status_code} from /api/tags.",
|
||||||
|
status_code=response.status_code,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check model is available
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except (ValueError, KeyError):
|
||||||
|
logger.warning("preflight_parse_error", msg="Could not parse /api/tags response")
|
||||||
|
return
|
||||||
|
|
||||||
|
available = [m.get("name", "") for m in data.get("models", [])]
|
||||||
|
model = self._config.model
|
||||||
|
|
||||||
|
# Match with or without tag suffix (e.g. "qwen3.5" matches "qwen3.5:latest")
|
||||||
|
if not any(model == name or model == name.split(":")[0] for name in available):
|
||||||
|
available_str = ", ".join(available) if available else "(none)"
|
||||||
|
raise LLMResponseError(
|
||||||
|
f"Model '{model}' not found. Available models: {available_str}"
|
||||||
|
)
|
||||||
|
|
||||||
async def stream_chat(self, messages: list[Message]) -> AsyncIterator[dict]:
|
async def stream_chat(self, messages: list[Message]) -> AsyncIterator[dict]:
|
||||||
"""Stream a chat completion request, yielding parsed SSE chunks.
|
"""Stream a chat completion request, yielding parsed SSE chunks.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user