Merge branch 'feature/llm-extra-body-config'

feat: add extra_body config for model-specific API parameters
Allows passing arbitrary parameters (e.g., enable_thinking, reasoning_effort) to the LLM API request body via config.yaml, solving reasoning-only response loops with models like Qwen 3.x without requiring code changes per model. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 19:15:39 -05:00 · 2026-03-11 19:15:35 -05:00
3 changed files with 16 additions and 0 deletions
--- a/app/models/config.py
+++ b/app/models/config.py
@@ -2,6 +2,7 @@
 import os
 from pathlib import Path
 from typing import Any
 import yaml
 from pydantic import BaseModel, Field, model_validator
@@ -19,6 +20,10 @@ class LLMConfig(BaseModel):
    max_retries: int = Field(default=3, description="Max retry attempts on transient errors")
    retry_backoff_base: float = Field(default=1.0, description="Base seconds for exponential backoff")
    retry_backoff_max: float = Field(default=30.0, description="Maximum backoff seconds")
    extra_body: dict[str, Any] = Field(
        default_factory=dict,
        description="Extra parameters merged into the API request body (model-specific)",
    )
 class AgentConfig(BaseModel):
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -151,6 +151,10 @@ class LLMClient:
        if tools:
            payload["tools"] = tools
        # Merge model-specific extra parameters (e.g., enable_thinking, reasoning_effort)
        if self._config.extra_body:
            payload.update(self._config.extra_body)
        try:
            async with self._client.stream(
                "POST", self._config.api_path, json=payload
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -10,6 +10,13 @@ llm:
  max_retries: 3
  retry_backoff_base: 1.0
  retry_backoff_max: 30.0
  # Extra parameters merged into the API request body (model-specific).
  # Examples:
  #   Qwen 3.x:  enable_thinking: false
  #   DeepSeek:   enable_thinking: false
  #   OpenAI:     reasoning_effort: "low"
  extra_body:
    enable_thinking: false
 agent:
  max_iterations: 25