Compare commits
2 Commits
25fa7dc82b
...
9273d14845
| Author | SHA1 | Date | |
|---|---|---|---|
| 9273d14845 | |||
| f0d8ef8f0a |
@@ -92,9 +92,29 @@ class AgentLoop:
|
|||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
def _get_messages_with_system_prompt(self) -> list[Message]:
|
def _get_messages_with_system_prompt(self) -> list[Message]:
|
||||||
"""Prepend the system prompt to conversation history."""
|
"""Prepend the system prompt to conversation history.
|
||||||
|
|
||||||
|
When thinking is disabled, injects a /no_think tag into the last user
|
||||||
|
message so Qwen 3.x (and similar) chat templates see it regardless of
|
||||||
|
where tool-result messages fall in the history.
|
||||||
|
"""
|
||||||
system_msg = Message(role="system", content=self._system_prompt)
|
system_msg = Message(role="system", content=self._system_prompt)
|
||||||
return [system_msg] + self._ctx.get_history()
|
history = self._ctx.get_history()
|
||||||
|
|
||||||
|
if not self._config.llm.thinking and history:
|
||||||
|
history = list(history)
|
||||||
|
for i in range(len(history) - 1, -1, -1):
|
||||||
|
if history[i].role == "user":
|
||||||
|
original = history[i]
|
||||||
|
history[i] = Message(
|
||||||
|
role="user",
|
||||||
|
content=(original.content or "") + " /no_think",
|
||||||
|
tool_call_id=original.tool_call_id,
|
||||||
|
name=original.name,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
return [system_msg] + history
|
||||||
|
|
||||||
async def run_turn(self, user_input: str) -> None:
|
async def run_turn(self, user_input: str) -> None:
|
||||||
"""Execute one full agent turn: add user message, loop until done.
|
"""Execute one full agent turn: add user message, loop until done.
|
||||||
@@ -161,6 +181,10 @@ class AgentLoop:
|
|||||||
reasoning_only_streak += 1
|
reasoning_only_streak += 1
|
||||||
self._ctx.pop_last_message()
|
self._ctx.pop_last_message()
|
||||||
|
|
||||||
|
# When thinking is disabled, reasoning-only is expected model noise.
|
||||||
|
# Nudge immediately and silently to avoid wasting iterations.
|
||||||
|
thinking_disabled = not self._config.llm.thinking
|
||||||
|
|
||||||
# If the last context messages are tool errors, nudge immediately
|
# If the last context messages are tool errors, nudge immediately
|
||||||
# rather than wasting retries — the model is likely confused by the error.
|
# rather than wasting retries — the model is likely confused by the error.
|
||||||
has_recent_tool_error = any(
|
has_recent_tool_error = any(
|
||||||
@@ -168,9 +192,14 @@ class AgentLoop:
|
|||||||
for m in self._ctx.get_history()[-3:]
|
for m in self._ctx.get_history()[-3:]
|
||||||
)
|
)
|
||||||
|
|
||||||
if has_recent_tool_error or reasoning_only_streak >= _MAX_REASONING_RETRIES:
|
should_nudge = (
|
||||||
# Nudge the model by injecting a user hint
|
thinking_disabled
|
||||||
if self._display:
|
or has_recent_tool_error
|
||||||
|
or reasoning_only_streak >= _MAX_REASONING_RETRIES
|
||||||
|
)
|
||||||
|
|
||||||
|
if should_nudge:
|
||||||
|
if not thinking_disabled and self._display:
|
||||||
self._display.write_warning(
|
self._display.write_warning(
|
||||||
f"Model produced reasoning but no response {reasoning_only_streak} times. "
|
f"Model produced reasoning but no response {reasoning_only_streak} times. "
|
||||||
"Nudging model to respond..."
|
"Nudging model to respond..."
|
||||||
|
|||||||
@@ -20,6 +20,10 @@ class LLMConfig(BaseModel):
|
|||||||
max_retries: int = Field(default=3, description="Max retry attempts on transient errors")
|
max_retries: int = Field(default=3, description="Max retry attempts on transient errors")
|
||||||
retry_backoff_base: float = Field(default=1.0, description="Base seconds for exponential backoff")
|
retry_backoff_base: float = Field(default=1.0, description="Base seconds for exponential backoff")
|
||||||
retry_backoff_max: float = Field(default=30.0, description="Maximum backoff seconds")
|
retry_backoff_max: float = Field(default=30.0, description="Maximum backoff seconds")
|
||||||
|
thinking: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description="Enable model thinking/reasoning mode (disable to reduce reasoning-only loops)",
|
||||||
|
)
|
||||||
extra_body: dict[str, Any] = Field(
|
extra_body: dict[str, Any] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
description="Extra parameters merged into the API request body (model-specific)",
|
description="Extra parameters merged into the API request body (model-specific)",
|
||||||
|
|||||||
@@ -151,7 +151,11 @@ class LLMClient:
|
|||||||
if tools:
|
if tools:
|
||||||
payload["tools"] = tools
|
payload["tools"] = tools
|
||||||
|
|
||||||
# Merge model-specific extra parameters (e.g., enable_thinking, reasoning_effort)
|
# When thinking is disabled, inject chat_template_kwargs for backends that support it
|
||||||
|
if not self._config.thinking:
|
||||||
|
payload.setdefault("chat_template_kwargs", {})["enable_thinking"] = False
|
||||||
|
|
||||||
|
# Merge model-specific extra parameters (e.g., reasoning_effort)
|
||||||
if self._config.extra_body:
|
if self._config.extra_body:
|
||||||
payload.update(self._config.extra_body)
|
payload.update(self._config.extra_body)
|
||||||
|
|
||||||
|
|||||||
@@ -10,13 +10,11 @@ llm:
|
|||||||
max_retries: 3
|
max_retries: 3
|
||||||
retry_backoff_base: 1.0
|
retry_backoff_base: 1.0
|
||||||
retry_backoff_max: 30.0
|
retry_backoff_max: 30.0
|
||||||
|
thinking: false # Disable model thinking/reasoning mode (reduces reasoning-only loops)
|
||||||
# Extra parameters merged into the API request body (model-specific).
|
# Extra parameters merged into the API request body (model-specific).
|
||||||
# Examples:
|
# Examples:
|
||||||
# Qwen 3.x: enable_thinking: false
|
|
||||||
# DeepSeek: enable_thinking: false
|
|
||||||
# OpenAI: reasoning_effort: "low"
|
# OpenAI: reasoning_effort: "low"
|
||||||
extra_body:
|
extra_body: {}
|
||||||
enable_thinking: false
|
|
||||||
|
|
||||||
agent:
|
agent:
|
||||||
max_iterations: 25
|
max_iterations: 25
|
||||||
|
|||||||
Reference in New Issue
Block a user