fix: empty response handling, /no_think model gating, per-model profiles
- Detect empty LLM responses (no content, no tool calls) instead of silently treating them as task completion. Retries once without tools before warning the user. - Gate /no_think system message and chat_template_kwargs to Qwen/QwQ models only — sending /no_think to llama3.x caused empty responses. - Add model_profiles config section for per-model overrides (token budget, thinking, temperature, max_tokens) matched by name prefix. Applied at startup and on /model switch. - Update SessionManager on /model switch so session files record the correct model. - Add NDJSON fallback in SSE stream parser for Ollama compatibility. - Improve read_file error to suggest find_files on FileNotFoundError. - Add diagnostic logging for empty streams and empty results. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,23 @@ class AgentMode(StrEnum):
|
||||
AUTO = "auto"
|
||||
|
||||
|
||||
class ModelProfile(BaseModel):
|
||||
"""Per-model overrides applied when switching models."""
|
||||
|
||||
max_conversation_tokens: int | None = Field(
|
||||
default=None, description="Token budget override for this model's context window"
|
||||
)
|
||||
thinking: bool | None = Field(
|
||||
default=None, description="Override thinking mode for this model"
|
||||
)
|
||||
temperature: float | None = Field(
|
||||
default=None, description="Override sampling temperature"
|
||||
)
|
||||
max_tokens: int | None = Field(
|
||||
default=None, description="Override max response tokens"
|
||||
)
|
||||
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
"""LLM backend configuration."""
|
||||
|
||||
@@ -145,6 +162,10 @@ class AppConfig(BaseModel):
|
||||
session: SessionConfig = Field(default_factory=SessionConfig)
|
||||
debug: DebugConfig = Field(default_factory=DebugConfig)
|
||||
skills: SkillsConfig = Field(default_factory=SkillsConfig)
|
||||
model_profiles: dict[str, ModelProfile] = Field(
|
||||
default_factory=dict,
|
||||
description="Per-model overrides keyed by model name prefix",
|
||||
)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def resolve_workspace_root(self) -> "AppConfig":
|
||||
@@ -152,6 +173,39 @@ class AppConfig(BaseModel):
|
||||
self.agent.workspace_root = self.agent.workspace_root.resolve()
|
||||
return self
|
||||
|
||||
def get_model_profile(self, model: str) -> ModelProfile | None:
|
||||
"""Find the best matching model profile by prefix.
|
||||
|
||||
Matches the longest prefix first (e.g., "llama3.1" beats "llama3"
|
||||
for model "llama3.1:latest"). Returns None if no profile matches.
|
||||
"""
|
||||
model_lower = model.lower().split(":")[0] # strip tag
|
||||
best_match: str | None = None
|
||||
for key in self.model_profiles:
|
||||
key_lower = key.lower()
|
||||
if model_lower == key_lower or model_lower.startswith(key_lower):
|
||||
if best_match is None or len(key) > len(best_match):
|
||||
best_match = key
|
||||
return self.model_profiles.get(best_match) if best_match else None
|
||||
|
||||
def apply_model_profile(self, model: str) -> ModelProfile | None:
|
||||
"""Apply the matching model profile overrides to the active config.
|
||||
|
||||
Returns the applied profile, or None if no profile matched.
|
||||
"""
|
||||
profile = self.get_model_profile(model)
|
||||
if profile is None:
|
||||
return None
|
||||
if profile.max_conversation_tokens is not None:
|
||||
self.agent.max_conversation_tokens = profile.max_conversation_tokens
|
||||
if profile.thinking is not None:
|
||||
self.llm.thinking = profile.thinking
|
||||
if profile.temperature is not None:
|
||||
self.llm.temperature = profile.temperature
|
||||
if profile.max_tokens is not None:
|
||||
self.llm.max_tokens = profile.max_tokens
|
||||
return profile
|
||||
|
||||
|
||||
# Default config file location relative to project root
|
||||
_DEFAULT_CONFIG_PATH = Path("config/config.yaml")
|
||||
|
||||
Reference in New Issue
Block a user