fix: empty response handling, /no_think model gating, per-model profiles
- Detect empty LLM responses (no content, no tool calls) instead of silently treating them as task completion. Retries once without tools before warning the user. - Gate /no_think system message and chat_template_kwargs to Qwen/QwQ models only — sending /no_think to llama3.x caused empty responses. - Add model_profiles config section for per-model overrides (token budget, thinking, temperature, max_tokens) matched by name prefix. Applied at startup and on /model switch. - Update SessionManager on /model switch so session files record the correct model. - Add NDJSON fallback in SSE stream parser for Ollama compatibility. - Improve read_file error to suggest find_files on FileNotFoundError. - Add diagnostic logging for empty streams and empty results. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -75,6 +75,9 @@ class SneakyCodeApp(App):
|
||||
"""Initialize agent components after the app is mounted."""
|
||||
setup_logging_for_tui()
|
||||
|
||||
# Apply model profile for the initial model before creating context
|
||||
self._config.apply_model_profile(self._config.llm.model)
|
||||
|
||||
self._ctx = SessionContext(self._config)
|
||||
|
||||
# Create long-lived agent dependencies (reused across turns)
|
||||
@@ -255,8 +258,29 @@ class SneakyCodeApp(App):
|
||||
else:
|
||||
new_model = parts[1].strip()
|
||||
self._config.llm.model = new_model
|
||||
if self._session_mgr:
|
||||
self._session_mgr.update_model(new_model)
|
||||
# Apply model-specific profile overrides
|
||||
profile = self._config.apply_model_profile(new_model)
|
||||
if profile and self._ctx:
|
||||
# Update token budget if the profile overrides it
|
||||
self._ctx.token_counter.budget = self._config.agent.max_conversation_tokens
|
||||
self.query_one(HeaderPanel).update_model(new_model)
|
||||
log.write(Text(f"Switched to model: {new_model}", style="bold green"))
|
||||
header = self.query_one(HeaderPanel)
|
||||
header.update_tokens(
|
||||
self._ctx.estimated_tokens if self._ctx else 0,
|
||||
self._config.agent.max_conversation_tokens,
|
||||
)
|
||||
msg = f"Switched to model: {new_model}"
|
||||
if profile:
|
||||
overrides = []
|
||||
if profile.max_conversation_tokens is not None:
|
||||
overrides.append(f"tokens={profile.max_conversation_tokens:,}")
|
||||
if profile.thinking is not None:
|
||||
overrides.append(f"thinking={'on' if profile.thinking else 'off'}")
|
||||
if overrides:
|
||||
msg += f" ({', '.join(overrides)})"
|
||||
log.write(Text(msg, style="bold green"))
|
||||
elif cmd.split()[0] == "/mode":
|
||||
parts = command.split(maxsplit=1)
|
||||
if len(parts) == 1:
|
||||
|
||||
Reference in New Issue
Block a user