first commit

2025-11-24 23:10:55 -06:00
commit 8315fa51c9
279 changed files with 74600 additions and 0 deletions
--- a/api/app/ai/init.py
+++ b/api/app/ai/init.py
@@ -0,0 +1,61 @@
+"""
+AI integration module for Code of Conquest.
+
+This module contains clients and utilities for AI-powered features
+including narrative generation, quest selection, and NPC dialogue.
+"""
+
+from app.ai.replicate_client import (
+    ReplicateClient,
+    ReplicateResponse,
+    ReplicateClientError,
+    ReplicateAPIError,
+    ReplicateRateLimitError,
+    ReplicateTimeoutError,
+    ModelType,
+)
+
+from app.ai.model_selector import (
+    ModelSelector,
+    ModelConfig,
+    UserTier,
+    ContextType,
+)
+
+from app.ai.prompt_templates import (
+    PromptTemplates,
+    PromptTemplateError,
+    get_prompt_templates,
+    render_prompt,
+)
+
+from app.ai.narrative_generator import (
+    NarrativeGenerator,
+    NarrativeResponse,
+    NarrativeGeneratorError,
+)
+
+__all__ = [
+    # Replicate client
+    "ReplicateClient",
+    "ReplicateResponse",
+    "ReplicateClientError",
+    "ReplicateAPIError",
+    "ReplicateRateLimitError",
+    "ReplicateTimeoutError",
+    "ModelType",
+    # Model selector
+    "ModelSelector",
+    "ModelConfig",
+    "UserTier",
+    "ContextType",
+    # Prompt templates
+    "PromptTemplates",
+    "PromptTemplateError",
+    "get_prompt_templates",
+    "render_prompt",
+    # Narrative generator
+    "NarrativeGenerator",
+    "NarrativeResponse",
+    "NarrativeGeneratorError",
+]
--- a/api/app/ai/model_selector.py
+++ b/api/app/ai/model_selector.py
@@ -0,0 +1,226 @@
+"""
+Model selector for tier-based AI model routing.
+
+This module provides intelligent model selection based on user subscription tiers
+and context types to optimize cost and quality.
+"""
+
+from dataclasses import dataclass
+from enum import Enum, auto
+
+import structlog
+
+from app.ai.replicate_client import ModelType
+
+logger = structlog.get_logger(__name__)
+
+
+class UserTier(str, Enum):
+    """User subscription tiers."""
+    FREE = "free"
+    BASIC = "basic"
+    PREMIUM = "premium"
+    ELITE = "elite"
+
+
+class ContextType(str, Enum):
+    """Types of AI generation contexts."""
+    STORY_PROGRESSION = "story_progression"
+    COMBAT_NARRATION = "combat_narration"
+    QUEST_SELECTION = "quest_selection"
+    NPC_DIALOGUE = "npc_dialogue"
+    SIMPLE_RESPONSE = "simple_response"
+
+
+@dataclass
+class ModelConfig:
+    """Configuration for a selected model."""
+    model_type: ModelType
+    max_tokens: int
+    temperature: float
+
+    @property
+    def model(self) -> str:
+        """Get the model identifier string."""
+        return self.model_type.value
+
+
+class ModelSelector:
+    """
+    Selects appropriate AI models based on user tier and context.
+
+    This class implements tier-based routing to ensure:
+    - Free users get Llama-3 (no cost)
+    - Basic users get Claude Haiku (low cost)
+    - Premium users get Claude Sonnet (medium cost)
+    - Elite users get Claude Opus (high cost)
+
+    Context-specific optimizations adjust token limits and temperature
+    for different use cases.
+    """
+
+    # Tier to model mapping
+    TIER_MODELS = {
+        UserTier.FREE: ModelType.LLAMA_3_8B,
+        UserTier.BASIC: ModelType.CLAUDE_HAIKU,
+        UserTier.PREMIUM: ModelType.CLAUDE_SONNET,
+        UserTier.ELITE: ModelType.CLAUDE_SONNET_4,
+    }
+
+    # Base token limits by tier
+    BASE_TOKEN_LIMITS = {
+        UserTier.FREE: 256,
+        UserTier.BASIC: 512,
+        UserTier.PREMIUM: 1024,
+        UserTier.ELITE: 2048,
+    }
+
+    # Temperature settings by context type
+    CONTEXT_TEMPERATURES = {
+        ContextType.STORY_PROGRESSION: 0.9,  # Creative, varied
+        ContextType.COMBAT_NARRATION: 0.8,   # Exciting but coherent
+        ContextType.QUEST_SELECTION: 0.5,    # More deterministic
+        ContextType.NPC_DIALOGUE: 0.85,      # Natural conversation
+        ContextType.SIMPLE_RESPONSE: 0.7,    # Balanced
+    }
+
+    # Token multipliers by context (relative to base)
+    CONTEXT_TOKEN_MULTIPLIERS = {
+        ContextType.STORY_PROGRESSION: 1.0,   # Full allocation
+        ContextType.COMBAT_NARRATION: 0.75,   # Shorter, punchier
+        ContextType.QUEST_SELECTION: 0.5,     # Brief selection
+        ContextType.NPC_DIALOGUE: 0.75,       # Conversational
+        ContextType.SIMPLE_RESPONSE: 0.5,     # Quick responses
+    }
+
+    def __init__(self):
+        """Initialize the model selector."""
+        logger.info("ModelSelector initialized")
+
+    def select_model(
+        self,
+        user_tier: UserTier,
+        context_type: ContextType = ContextType.SIMPLE_RESPONSE
+    ) -> ModelConfig:
+        """
+        Select the appropriate model configuration for a user and context.
+
+        Args:
+            user_tier: The user's subscription tier.
+            context_type: The type of content being generated.
+
+        Returns:
+            ModelConfig with model type, token limit, and temperature.
+
+        Example:
+            >>> selector = ModelSelector()
+            >>> config = selector.select_model(UserTier.PREMIUM, ContextType.STORY_PROGRESSION)
+            >>> config.model_type
+            <ModelType.CLAUDE_SONNET: 'anthropic/claude-3.5-sonnet'>
+        """
+        # Get model for tier
+        model_type = self.TIER_MODELS[user_tier]
+
+        # Calculate max tokens
+        base_tokens = self.BASE_TOKEN_LIMITS[user_tier]
+        multiplier = self.CONTEXT_TOKEN_MULTIPLIERS.get(context_type, 1.0)
+        max_tokens = int(base_tokens * multiplier)
+
+        # Get temperature for context
+        temperature = self.CONTEXT_TEMPERATURES.get(context_type, 0.7)
+
+        config = ModelConfig(
+            model_type=model_type,
+            max_tokens=max_tokens,
+            temperature=temperature
+        )
+
+        logger.debug(
+            "Model selected",
+            user_tier=user_tier.value,
+            context_type=context_type.value,
+            model=model_type.value,
+            max_tokens=max_tokens,
+            temperature=temperature
+        )
+
+        return config
+
+    def get_model_for_tier(self, user_tier: UserTier) -> ModelType:
+        """
+        Get the default model for a user tier.
+
+        Args:
+            user_tier: The user's subscription tier.
+
+        Returns:
+            The ModelType for this tier.
+        """
+        return self.TIER_MODELS[user_tier]
+
+    def get_tier_info(self, user_tier: UserTier) -> dict:
+        """
+        Get information about a tier's AI capabilities.
+
+        Args:
+            user_tier: The user's subscription tier.
+
+        Returns:
+            Dictionary with tier information.
+        """
+        model_type = self.TIER_MODELS[user_tier]
+
+        # Map models to friendly names
+        model_names = {
+            ModelType.LLAMA_3_8B: "Llama 3 8B",
+            ModelType.CLAUDE_HAIKU: "Claude 3 Haiku",
+            ModelType.CLAUDE_SONNET: "Claude 3.5 Sonnet",
+            ModelType.CLAUDE_SONNET_4: "Claude Sonnet 4",
+        }
+
+        # Model quality descriptions
+        quality_descriptions = {
+            ModelType.LLAMA_3_8B: "Good quality, optimized for speed",
+            ModelType.CLAUDE_HAIKU: "High quality, fast responses",
+            ModelType.CLAUDE_SONNET: "Excellent quality, detailed narratives",
+            ModelType.CLAUDE_SONNET_4: "Best quality, most creative and nuanced",
+        }
+
+        return {
+            "tier": user_tier.value,
+            "model": model_type.value,
+            "model_name": model_names.get(model_type, model_type.value),
+            "base_tokens": self.BASE_TOKEN_LIMITS[user_tier],
+            "quality": quality_descriptions.get(model_type, "Standard quality"),
+        }
+
+    def estimate_cost_per_request(self, user_tier: UserTier) -> float:
+        """
+        Estimate the cost per AI request for a tier.
+
+        Args:
+            user_tier: The user's subscription tier.
+
+        Returns:
+            Estimated cost in USD per request.
+
+        Note:
+            These are rough estimates based on typical usage.
+            Actual costs depend on input/output tokens.
+        """
+        # Approximate cost per 1K tokens (input + output average)
+        COST_PER_1K_TOKENS = {
+            ModelType.LLAMA_3_8B: 0.0,       # Free tier
+            ModelType.CLAUDE_HAIKU: 0.001,   # $0.25/1M input, $1.25/1M output
+            ModelType.CLAUDE_SONNET: 0.006,  # $3/1M input, $15/1M output
+            ModelType.CLAUDE_SONNET_4: 0.015,  # Claude Sonnet 4 pricing
+        }
+
+        model_type = self.TIER_MODELS[user_tier]
+        base_tokens = self.BASE_TOKEN_LIMITS[user_tier]
+        cost_per_1k = COST_PER_1K_TOKENS.get(model_type, 0.0)
+
+        # Estimate: base tokens for output + ~50% for input tokens
+        estimated_tokens = base_tokens * 1.5
+
+        return (estimated_tokens / 1000) * cost_per_1k
--- a/api/app/ai/narrative_generator.py
+++ b/api/app/ai/narrative_generator.py
@@ -0,0 +1,540 @@
+"""
+Narrative generator wrapper for AI content generation.
+
+This module provides a high-level API for generating narrative content
+using the appropriate AI models based on user tier and context.
+"""
+
+from dataclasses import dataclass
+from typing import Any
+
+import structlog
+
+from app.ai.replicate_client import (
+    ReplicateClient,
+    ReplicateResponse,
+    ReplicateClientError,
+)
+from app.ai.model_selector import (
+    ModelSelector,
+    ModelConfig,
+    UserTier,
+    ContextType,
+)
+from app.ai.prompt_templates import (
+    PromptTemplates,
+    PromptTemplateError,
+    get_prompt_templates,
+)
+
+logger = structlog.get_logger(__name__)
+
+
+@dataclass
+class NarrativeResponse:
+    """Response from narrative generation."""
+    narrative: str
+    tokens_used: int
+    tokens_input: int
+    tokens_output: int
+    model: str
+    context_type: str
+    generation_time: float
+
+
+class NarrativeGeneratorError(Exception):
+    """Base exception for narrative generator errors."""
+    pass
+
+
+class NarrativeGenerator:
+    """
+    High-level wrapper for AI narrative generation.
+
+    This class coordinates between the model selector, prompt templates,
+    and AI clients to generate narrative content for the game.
+
+    It provides specialized methods for different narrative contexts:
+    - Story progression responses
+    - Combat narration
+    - Quest selection
+    - NPC dialogue
+    """
+
+    # System prompts for different contexts
+    SYSTEM_PROMPTS = {
+        ContextType.STORY_PROGRESSION: (
+            "You are an expert Dungeon Master running a solo D&D-style adventure. "
+            "Create immersive, engaging narratives that respond to player actions. "
+            "Be descriptive but concise. Always end with a clear opportunity for the player to act. "
+            "CRITICAL: NEVER give the player items, gold, equipment, or any rewards unless the action "
+            "instructions explicitly state they should receive them. Only narrate what the template "
+            "describes - do not improvise rewards or discoveries."
+        ),
+        ContextType.COMBAT_NARRATION: (
+            "You are a combat narrator for a fantasy RPG. "
+            "Describe actions with visceral, cinematic detail. "
+            "Keep narration punchy and exciting. Never include game mechanics in prose."
+        ),
+        ContextType.QUEST_SELECTION: (
+            "You are a quest selection system. "
+            "Analyze the context and select the most narratively appropriate quest. "
+            "Respond only with the quest_id - no explanation."
+        ),
+        ContextType.NPC_DIALOGUE: (
+            "You are a skilled voice actor portraying NPCs in a fantasy world. "
+            "Stay in character at all times. Give each NPC a distinct voice and personality. "
+            "Provide useful information while maintaining immersion."
+        ),
+    }
+
+    def __init__(
+        self,
+        model_selector: ModelSelector | None = None,
+        replicate_client: ReplicateClient | None = None,
+        prompt_templates: PromptTemplates | None = None
+    ):
+        """
+        Initialize the narrative generator.
+
+        Args:
+            model_selector: Optional custom model selector.
+            replicate_client: Optional custom Replicate client.
+            prompt_templates: Optional custom prompt templates.
+        """
+        self.model_selector = model_selector or ModelSelector()
+        self.replicate_client = replicate_client
+        self.prompt_templates = prompt_templates or get_prompt_templates()
+
+        logger.info("NarrativeGenerator initialized")
+
+    def _get_client(self, model_config: ModelConfig) -> ReplicateClient:
+        """
+        Get or create a Replicate client for the given model configuration.
+
+        Args:
+            model_config: The model configuration to use.
+
+        Returns:
+            ReplicateClient configured for the specified model.
+        """
+        # If a client was provided at init, use it
+        if self.replicate_client:
+            return self.replicate_client
+
+        # Otherwise create a new client with the specified model
+        return ReplicateClient(model=model_config.model_type)
+
+    def generate_story_response(
+        self,
+        character: dict[str, Any],
+        action: str,
+        game_state: dict[str, Any],
+        user_tier: UserTier,
+        conversation_history: list[dict[str, Any]] | None = None,
+        world_context: str | None = None,
+        action_instructions: str | None = None
+    ) -> NarrativeResponse:
+        """
+        Generate a DM response to a player's story action.
+
+        Args:
+            character: Character data dictionary with name, level, player_class, stats, etc.
+            action: The action the player wants to take.
+            game_state: Current game state with location, quests, etc.
+            user_tier: The user's subscription tier.
+            conversation_history: Optional list of recent conversation entries.
+            world_context: Optional additional world information.
+            action_instructions: Optional action-specific instructions for the AI from
+                the dm_prompt_template field in action_prompts.yaml.
+
+        Returns:
+            NarrativeResponse with the generated narrative and metadata.
+
+        Raises:
+            NarrativeGeneratorError: If generation fails.
+
+        Example:
+            >>> generator = NarrativeGenerator()
+            >>> response = generator.generate_story_response(
+            ...     character={"name": "Aldric", "level": 3, "player_class": "Fighter", ...},
+            ...     action="I search the room for hidden doors",
+            ...     game_state={"current_location": "Ancient Library", ...},
+            ...     user_tier=UserTier.PREMIUM
+            ... )
+            >>> print(response.narrative)
+        """
+        context_type = ContextType.STORY_PROGRESSION
+
+        logger.info(
+            "Generating story response",
+            character_name=character.get("name"),
+            action=action[:50],
+            user_tier=user_tier.value,
+            location=game_state.get("current_location")
+        )
+
+        # Get model configuration for this tier and context
+        model_config = self.model_selector.select_model(user_tier, context_type)
+
+        # Build the prompt from template
+        try:
+            prompt = self.prompt_templates.render(
+                "story_action.j2",
+                character=character,
+                action=action,
+                game_state=game_state,
+                conversation_history=conversation_history or [],
+                world_context=world_context,
+                max_tokens=model_config.max_tokens,
+                action_instructions=action_instructions
+            )
+        except PromptTemplateError as e:
+            logger.error("Failed to render story prompt", error=str(e))
+            raise NarrativeGeneratorError(f"Prompt template error: {e}")
+
+        # Debug: Log the full prompt being sent
+        logger.debug(
+            "Full prompt being sent to AI",
+            prompt_length=len(prompt),
+            conversation_history_count=len(conversation_history) if conversation_history else 0,
+            prompt_preview=prompt[:500] + "..." if len(prompt) > 500 else prompt
+        )
+        # For detailed debugging, uncomment the line below:
+        print(f"\n{'='*60}\nFULL PROMPT:\n{'='*60}\n{prompt}\n{'='*60}\n")
+
+        # Get system prompt
+        system_prompt = self.SYSTEM_PROMPTS[context_type]
+
+        # Generate response
+        try:
+            client = self._get_client(model_config)
+            response = client.generate(
+                prompt=prompt,
+                system_prompt=system_prompt,
+                max_tokens=model_config.max_tokens,
+                temperature=model_config.temperature,
+                model=model_config.model_type
+            )
+        except ReplicateClientError as e:
+            logger.error(
+                "AI generation failed",
+                error=str(e),
+                context_type=context_type.value
+            )
+            raise NarrativeGeneratorError(f"AI generation failed: {e}")
+
+        logger.info(
+            "Story response generated",
+            tokens_used=response.tokens_used,
+            model=response.model,
+            generation_time=f"{response.generation_time:.2f}s"
+        )
+
+        return NarrativeResponse(
+            narrative=response.text,
+            tokens_used=response.tokens_used,
+            tokens_input=response.tokens_input,
+            tokens_output=response.tokens_output,
+            model=response.model,
+            context_type=context_type.value,
+            generation_time=response.generation_time
+        )
+
+    def generate_combat_narration(
+        self,
+        character: dict[str, Any],
+        combat_state: dict[str, Any],
+        action: str,
+        action_result: dict[str, Any],
+        user_tier: UserTier,
+        is_critical: bool = False,
+        is_finishing_blow: bool = False
+    ) -> NarrativeResponse:
+        """
+        Generate narration for a combat action.
+
+        Args:
+            character: Character data dictionary.
+            combat_state: Current combat state with enemies, round number, etc.
+            action: Description of the combat action taken.
+            action_result: Result of the action (hit, damage, effects, etc.).
+            user_tier: The user's subscription tier.
+            is_critical: Whether this was a critical hit/miss.
+            is_finishing_blow: Whether this defeats the enemy.
+
+        Returns:
+            NarrativeResponse with combat narration.
+
+        Raises:
+            NarrativeGeneratorError: If generation fails.
+
+        Example:
+            >>> response = generator.generate_combat_narration(
+            ...     character={"name": "Aldric", ...},
+            ...     combat_state={"round_number": 3, "enemies": [...], ...},
+            ...     action="swings their sword at the goblin",
+            ...     action_result={"hit": True, "damage": 12, ...},
+            ...     user_tier=UserTier.BASIC
+            ... )
+        """
+        context_type = ContextType.COMBAT_NARRATION
+
+        logger.info(
+            "Generating combat narration",
+            character_name=character.get("name"),
+            action=action[:50],
+            is_critical=is_critical,
+            is_finishing_blow=is_finishing_blow
+        )
+
+        # Get model configuration
+        model_config = self.model_selector.select_model(user_tier, context_type)
+
+        # Build the prompt
+        try:
+            prompt = self.prompt_templates.render(
+                "combat_action.j2",
+                character=character,
+                combat_state=combat_state,
+                action=action,
+                action_result=action_result,
+                is_critical=is_critical,
+                is_finishing_blow=is_finishing_blow,
+                max_tokens=model_config.max_tokens
+            )
+        except PromptTemplateError as e:
+            logger.error("Failed to render combat prompt", error=str(e))
+            raise NarrativeGeneratorError(f"Prompt template error: {e}")
+
+        # Generate response
+        system_prompt = self.SYSTEM_PROMPTS[context_type]
+
+        try:
+            client = self._get_client(model_config)
+            response = client.generate(
+                prompt=prompt,
+                system_prompt=system_prompt,
+                max_tokens=model_config.max_tokens,
+                temperature=model_config.temperature,
+                model=model_config.model_type
+            )
+        except ReplicateClientError as e:
+            logger.error("Combat narration generation failed", error=str(e))
+            raise NarrativeGeneratorError(f"AI generation failed: {e}")
+
+        logger.info(
+            "Combat narration generated",
+            tokens_used=response.tokens_used,
+            generation_time=f"{response.generation_time:.2f}s"
+        )
+
+        return NarrativeResponse(
+            narrative=response.text,
+            tokens_used=response.tokens_used,
+            tokens_input=response.tokens_input,
+            tokens_output=response.tokens_output,
+            model=response.model,
+            context_type=context_type.value,
+            generation_time=response.generation_time
+        )
+
+    def generate_quest_selection(
+        self,
+        character: dict[str, Any],
+        eligible_quests: list[dict[str, Any]],
+        game_context: dict[str, Any],
+        user_tier: UserTier,
+        recent_actions: list[str] | None = None
+    ) -> str:
+        """
+        Use AI to select the most contextually appropriate quest.
+
+        Args:
+            character: Character data dictionary.
+            eligible_quests: List of quest data dictionaries that can be offered.
+            game_context: Current game context (location, events, etc.).
+            user_tier: The user's subscription tier.
+            recent_actions: Optional list of recent player actions.
+
+        Returns:
+            The quest_id of the selected quest.
+
+        Raises:
+            NarrativeGeneratorError: If generation fails or response is invalid.
+
+        Example:
+            >>> quest_id = generator.generate_quest_selection(
+            ...     character={"name": "Aldric", "level": 3, ...},
+            ...     eligible_quests=[{"quest_id": "goblin_cave", ...}, ...],
+            ...     game_context={"current_location": "Tavern", ...},
+            ...     user_tier=UserTier.FREE
+            ... )
+            >>> print(quest_id)  # "goblin_cave"
+        """
+        context_type = ContextType.QUEST_SELECTION
+
+        logger.info(
+            "Generating quest selection",
+            character_name=character.get("name"),
+            num_eligible_quests=len(eligible_quests),
+            location=game_context.get("current_location")
+        )
+
+        if not eligible_quests:
+            raise NarrativeGeneratorError("No eligible quests provided")
+
+        # Get model configuration
+        model_config = self.model_selector.select_model(user_tier, context_type)
+
+        # Build the prompt
+        try:
+            prompt = self.prompt_templates.render(
+                "quest_offering.j2",
+                character=character,
+                eligible_quests=eligible_quests,
+                game_context=game_context,
+                recent_actions=recent_actions or []
+            )
+        except PromptTemplateError as e:
+            logger.error("Failed to render quest selection prompt", error=str(e))
+            raise NarrativeGeneratorError(f"Prompt template error: {e}")
+
+        # Generate response
+        system_prompt = self.SYSTEM_PROMPTS[context_type]
+
+        try:
+            client = self._get_client(model_config)
+            response = client.generate(
+                prompt=prompt,
+                system_prompt=system_prompt,
+                max_tokens=model_config.max_tokens,
+                temperature=model_config.temperature,
+                model=model_config.model_type
+            )
+        except ReplicateClientError as e:
+            logger.error("Quest selection generation failed", error=str(e))
+            raise NarrativeGeneratorError(f"AI generation failed: {e}")
+
+        # Parse the response to get quest_id
+        quest_id = response.text.strip().lower()
+
+        # Validate the response is a valid quest_id
+        valid_quest_ids = {q.get("quest_id", "").lower() for q in eligible_quests}
+        if quest_id not in valid_quest_ids:
+            logger.warning(
+                "AI returned invalid quest_id, using first eligible quest",
+                returned_id=quest_id,
+                valid_ids=list(valid_quest_ids)
+            )
+            quest_id = eligible_quests[0].get("quest_id", "")
+
+        logger.info(
+            "Quest selected",
+            quest_id=quest_id,
+            tokens_used=response.tokens_used,
+            generation_time=f"{response.generation_time:.2f}s"
+        )
+
+        return quest_id
+
+    def generate_npc_dialogue(
+        self,
+        character: dict[str, Any],
+        npc: dict[str, Any],
+        conversation_topic: str,
+        game_state: dict[str, Any],
+        user_tier: UserTier,
+        npc_relationship: str | None = None,
+        previous_dialogue: list[dict[str, Any]] | None = None,
+        npc_knowledge: list[str] | None = None
+    ) -> NarrativeResponse:
+        """
+        Generate NPC dialogue in response to player conversation.
+
+        Args:
+            character: Character data dictionary.
+            npc: NPC data with name, role, personality, etc.
+            conversation_topic: What the player said or wants to discuss.
+            game_state: Current game state.
+            user_tier: The user's subscription tier.
+            npc_relationship: Optional description of relationship with NPC.
+            previous_dialogue: Optional list of previous exchanges.
+            npc_knowledge: Optional list of things this NPC knows about.
+
+        Returns:
+            NarrativeResponse with NPC dialogue.
+
+        Raises:
+            NarrativeGeneratorError: If generation fails.
+
+        Example:
+            >>> response = generator.generate_npc_dialogue(
+            ...     character={"name": "Aldric", ...},
+            ...     npc={"name": "Old Barkeep", "role": "Tavern Owner", ...},
+            ...     conversation_topic="What rumors have you heard lately?",
+            ...     game_state={"current_location": "The Rusty Anchor", ...},
+            ...     user_tier=UserTier.PREMIUM
+            ... )
+        """
+        context_type = ContextType.NPC_DIALOGUE
+
+        logger.info(
+            "Generating NPC dialogue",
+            character_name=character.get("name"),
+            npc_name=npc.get("name"),
+            topic=conversation_topic[:50]
+        )
+
+        # Get model configuration
+        model_config = self.model_selector.select_model(user_tier, context_type)
+
+        # Build the prompt
+        try:
+            prompt = self.prompt_templates.render(
+                "npc_dialogue.j2",
+                character=character,
+                npc=npc,
+                conversation_topic=conversation_topic,
+                game_state=game_state,
+                npc_relationship=npc_relationship,
+                previous_dialogue=previous_dialogue or [],
+                npc_knowledge=npc_knowledge or [],
+                max_tokens=model_config.max_tokens
+            )
+        except PromptTemplateError as e:
+            logger.error("Failed to render NPC dialogue prompt", error=str(e))
+            raise NarrativeGeneratorError(f"Prompt template error: {e}")
+
+        # Generate response
+        system_prompt = self.SYSTEM_PROMPTS[context_type]
+
+        try:
+            client = self._get_client(model_config)
+            response = client.generate(
+                prompt=prompt,
+                system_prompt=system_prompt,
+                max_tokens=model_config.max_tokens,
+                temperature=model_config.temperature,
+                model=model_config.model_type
+            )
+        except ReplicateClientError as e:
+            logger.error("NPC dialogue generation failed", error=str(e))
+            raise NarrativeGeneratorError(f"AI generation failed: {e}")
+
+        logger.info(
+            "NPC dialogue generated",
+            npc_name=npc.get("name"),
+            tokens_used=response.tokens_used,
+            generation_time=f"{response.generation_time:.2f}s"
+        )
+
+        return NarrativeResponse(
+            narrative=response.text,
+            tokens_used=response.tokens_used,
+            tokens_input=response.tokens_input,
+            tokens_output=response.tokens_output,
+            model=response.model,
+            context_type=context_type.value,
+            generation_time=response.generation_time
+        )
--- a/api/app/ai/prompt_templates.py
+++ b/api/app/ai/prompt_templates.py
@@ -0,0 +1,318 @@
+"""
+Jinja2 prompt template system for AI generation.
+
+This module provides a templating system for building AI prompts
+with consistent structure and context injection.
+"""
+
+import os
+from pathlib import Path
+from typing import Any
+
+import structlog
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+
+logger = structlog.get_logger(__name__)
+
+
+class PromptTemplateError(Exception):
+    """Error in prompt template processing."""
+    pass
+
+
+class PromptTemplates:
+    """
+    Manages Jinja2 templates for AI prompt generation.
+
+    Provides caching, helper functions, and consistent template rendering
+    for all AI prompt types.
+    """
+
+    # Template directory relative to this module
+    TEMPLATE_DIR = Path(__file__).parent / "templates"
+
+    def __init__(self, template_dir: Path | str | None = None):
+        """
+        Initialize the prompt template system.
+
+        Args:
+            template_dir: Optional custom template directory path.
+        """
+        self.template_dir = Path(template_dir) if template_dir else self.TEMPLATE_DIR
+
+        # Ensure template directory exists
+        if not self.template_dir.exists():
+            self.template_dir.mkdir(parents=True, exist_ok=True)
+            logger.warning(
+                "Template directory created",
+                path=str(self.template_dir)
+            )
+
+        # Set up Jinja2 environment with caching
+        self.env = Environment(
+            loader=FileSystemLoader(str(self.template_dir)),
+            autoescape=select_autoescape(['html', 'xml']),
+            trim_blocks=True,
+            lstrip_blocks=True,
+        )
+
+        # Register custom filters
+        self._register_filters()
+
+        # Register custom globals
+        self._register_globals()
+
+        logger.info(
+            "PromptTemplates initialized",
+            template_dir=str(self.template_dir)
+        )
+
+    def _register_filters(self):
+        """Register custom Jinja2 filters."""
+        self.env.filters['format_inventory'] = self._format_inventory
+        self.env.filters['format_stats'] = self._format_stats
+        self.env.filters['format_skills'] = self._format_skills
+        self.env.filters['format_effects'] = self._format_effects
+        self.env.filters['truncate_text'] = self._truncate_text
+        self.env.filters['format_gold'] = self._format_gold
+
+    def _register_globals(self):
+        """Register global functions available in templates."""
+        self.env.globals['len'] = len
+        self.env.globals['min'] = min
+        self.env.globals['max'] = max
+        self.env.globals['enumerate'] = enumerate
+
+    # Custom filters
+    @staticmethod
+    def _format_inventory(items: list[dict], max_items: int = 10) -> str:
+        """
+        Format inventory items for prompt context.
+
+        Args:
+            items: List of item dictionaries with 'name' and 'quantity'.
+            max_items: Maximum number of items to display.
+
+        Returns:
+            Formatted inventory string.
+        """
+        if not items:
+            return "Empty inventory"
+
+        formatted = []
+        for item in items[:max_items]:
+            name = item.get('name', 'Unknown')
+            qty = item.get('quantity', 1)
+            if qty > 1:
+                formatted.append(f"{name} (x{qty})")
+            else:
+                formatted.append(name)
+
+        result = ", ".join(formatted)
+        if len(items) > max_items:
+            result += f", and {len(items) - max_items} more items"
+
+        return result
+
+    @staticmethod
+    def _format_stats(stats: dict) -> str:
+        """
+        Format character stats for prompt context.
+
+        Args:
+            stats: Dictionary of stat names to values.
+
+        Returns:
+            Formatted stats string.
+        """
+        if not stats:
+            return "No stats available"
+
+        formatted = []
+        for stat, value in stats.items():
+            # Convert snake_case to Title Case
+            display_name = stat.replace('_', ' ').title()
+            formatted.append(f"{display_name}: {value}")
+
+        return ", ".join(formatted)
+
+    @staticmethod
+    def _format_skills(skills: list[dict], max_skills: int = 5) -> str:
+        """
+        Format character skills for prompt context.
+
+        Args:
+            skills: List of skill dictionaries with 'name' and 'level'.
+            max_skills: Maximum number of skills to display.
+
+        Returns:
+            Formatted skills string.
+        """
+        if not skills:
+            return "No skills"
+
+        formatted = []
+        for skill in skills[:max_skills]:
+            name = skill.get('name', 'Unknown')
+            level = skill.get('level', 1)
+            formatted.append(f"{name} (Lv.{level})")
+
+        result = ", ".join(formatted)
+        if len(skills) > max_skills:
+            result += f", and {len(skills) - max_skills} more skills"
+
+        return result
+
+    @staticmethod
+    def _format_effects(effects: list[dict]) -> str:
+        """
+        Format active effects/buffs/debuffs for prompt context.
+
+        Args:
+            effects: List of effect dictionaries.
+
+        Returns:
+            Formatted effects string.
+        """
+        if not effects:
+            return "No active effects"
+
+        formatted = []
+        for effect in effects:
+            name = effect.get('name', 'Unknown')
+            duration = effect.get('remaining_turns')
+            if duration:
+                formatted.append(f"{name} ({duration} turns)")
+            else:
+                formatted.append(name)
+
+        return ", ".join(formatted)
+
+    @staticmethod
+    def _truncate_text(text: str, max_length: int = 100) -> str:
+        """
+        Truncate text to maximum length with ellipsis.
+
+        Args:
+            text: Text to truncate.
+            max_length: Maximum character length.
+
+        Returns:
+            Truncated text with ellipsis if needed.
+        """
+        if len(text) <= max_length:
+            return text
+        return text[:max_length - 3] + "..."
+
+    @staticmethod
+    def _format_gold(amount: int) -> str:
+        """
+        Format gold amount with commas.
+
+        Args:
+            amount: Gold amount.
+
+        Returns:
+            Formatted gold string.
+        """
+        return f"{amount:,} gold"
+
+    def render(self, template_name: str, **context: Any) -> str:
+        """
+        Render a template with the given context.
+
+        Args:
+            template_name: Name of the template file (e.g., 'story_action.j2').
+            **context: Variables to pass to the template.
+
+        Returns:
+            Rendered template string.
+
+        Raises:
+            PromptTemplateError: If template not found or rendering fails.
+        """
+        try:
+            template = self.env.get_template(template_name)
+            rendered = template.render(**context)
+
+            logger.debug(
+                "Template rendered",
+                template=template_name,
+                context_keys=list(context.keys()),
+                output_length=len(rendered)
+            )
+
+            return rendered.strip()
+
+        except Exception as e:
+            logger.error(
+                "Template rendering failed",
+                template=template_name,
+                error=str(e)
+            )
+            raise PromptTemplateError(f"Failed to render {template_name}: {e}")
+
+    def render_string(self, template_string: str, **context: Any) -> str:
+        """
+        Render a template string directly.
+
+        Args:
+            template_string: Jinja2 template string.
+            **context: Variables to pass to the template.
+
+        Returns:
+            Rendered string.
+
+        Raises:
+            PromptTemplateError: If rendering fails.
+        """
+        try:
+            template = self.env.from_string(template_string)
+            rendered = template.render(**context)
+            return rendered.strip()
+        except Exception as e:
+            logger.error(
+                "String template rendering failed",
+                error=str(e)
+            )
+            raise PromptTemplateError(f"Failed to render template string: {e}")
+
+    def get_template_names(self) -> list[str]:
+        """
+        Get list of available template names.
+
+        Returns:
+            List of template file names.
+        """
+        return self.env.list_templates(extensions=['j2'])
+
+
+# Global instance for convenience
+_templates: PromptTemplates | None = None
+
+
+def get_prompt_templates() -> PromptTemplates:
+    """
+    Get the global PromptTemplates instance.
+
+    Returns:
+        Singleton PromptTemplates instance.
+    """
+    global _templates
+    if _templates is None:
+        _templates = PromptTemplates()
+    return _templates
+
+
+def render_prompt(template_name: str, **context: Any) -> str:
+    """
+    Convenience function to render a prompt template.
+
+    Args:
+        template_name: Name of the template file.
+        **context: Variables to pass to the template.
+
+    Returns:
+        Rendered template string.
+    """
+    return get_prompt_templates().render(template_name, **context)
--- a/api/app/ai/replicate_client.py
+++ b/api/app/ai/replicate_client.py
@@ -0,0 +1,450 @@
+"""
+Replicate API client for AI model integration.
+
+This module provides a client for interacting with the Replicate API
+to generate text using various models including Llama-3 and Claude models.
+All AI generation goes through Replicate for unified billing and management.
+"""
+
+import time
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any
+
+import replicate
+import structlog
+
+from app.config import get_config
+
+logger = structlog.get_logger(__name__)
+
+
+class ModelType(str, Enum):
+    """Supported model types on Replicate."""
+    # Free tier - Llama models
+    LLAMA_3_8B = "meta/meta-llama-3-8b-instruct"
+
+    # Paid tiers - Claude models via Replicate
+    CLAUDE_HAIKU = "anthropic/claude-3.5-haiku"
+    CLAUDE_SONNET = "anthropic/claude-3.5-sonnet"
+    CLAUDE_SONNET_4 = "anthropic/claude-4.5-sonnet"
+
+
+@dataclass
+class ReplicateResponse:
+    """Response from Replicate API generation."""
+    text: str
+    tokens_used: int  # Deprecated: use tokens_output instead
+    tokens_input: int
+    tokens_output: int
+    model: str
+    generation_time: float
+
+
+class ReplicateClientError(Exception):
+    """Base exception for Replicate client errors."""
+    pass
+
+
+class ReplicateAPIError(ReplicateClientError):
+    """Error from Replicate API."""
+    pass
+
+
+class ReplicateRateLimitError(ReplicateClientError):
+    """Rate limit exceeded on Replicate API."""
+    pass
+
+
+class ReplicateTimeoutError(ReplicateClientError):
+    """Timeout waiting for Replicate response."""
+    pass
+
+
+class ReplicateClient:
+    """
+    Client for interacting with Replicate API.
+
+    Supports multiple models including Llama-3 and Claude models.
+    Implements retry logic with exponential backoff for rate limits.
+    """
+
+    # Default model for free tier
+    DEFAULT_MODEL = ModelType.LLAMA_3_8B
+
+    # Retry configuration
+    MAX_RETRIES = 3
+    INITIAL_RETRY_DELAY = 1.0  # seconds
+
+    # Default generation parameters
+    DEFAULT_MAX_TOKENS = 256
+    DEFAULT_TEMPERATURE = 0.7
+    DEFAULT_TOP_P = 0.9
+    DEFAULT_TIMEOUT = 30  # seconds
+
+    # Model-specific defaults
+    MODEL_DEFAULTS = {
+        ModelType.LLAMA_3_8B: {
+            "max_tokens": 256,
+            "temperature": 0.7,
+        },
+        ModelType.CLAUDE_HAIKU: {
+            "max_tokens": 512,
+            "temperature": 0.8,
+        },
+        ModelType.CLAUDE_SONNET: {
+            "max_tokens": 1024,
+            "temperature": 0.9,
+        },
+        ModelType.CLAUDE_SONNET_4: {
+            "max_tokens": 2048,
+            "temperature": 0.9,
+        },
+    }
+
+    def __init__(self, api_token: str | None = None, model: str | ModelType | None = None):
+        """
+        Initialize the Replicate client.
+
+        Args:
+            api_token: Replicate API token. If not provided, reads from config.
+            model: Model identifier or ModelType enum. Defaults to Llama-3 8B Instruct.
+
+        Raises:
+            ReplicateClientError: If API token is not configured.
+        """
+        config = get_config()
+
+        # Get API token from parameter or config
+        self.api_token = api_token or getattr(config, 'replicate_api_token', None)
+        if not self.api_token:
+            raise ReplicateClientError(
+                "Replicate API token not configured. "
+                "Set REPLICATE_API_TOKEN in environment or config."
+            )
+
+        # Get model from parameter, config, or default
+        if model is None:
+            model = getattr(config, 'REPLICATE_MODEL', None) or self.DEFAULT_MODEL
+
+        # Convert string to ModelType if needed, or keep as string for custom models
+        if isinstance(model, ModelType):
+            self.model = model.value
+            self.model_type = model
+        elif isinstance(model, str):
+            # Try to match to ModelType
+            self.model = model
+            self.model_type = self._get_model_type(model)
+        else:
+            self.model = self.DEFAULT_MODEL.value
+            self.model_type = self.DEFAULT_MODEL
+
+        # Set the API token for the replicate library
+        import os
+        os.environ['REPLICATE_API_TOKEN'] = self.api_token
+
+        logger.info(
+            "Replicate client initialized",
+            model=self.model,
+            model_type=self.model_type.name if self.model_type else "custom"
+        )
+
+    def _get_model_type(self, model_string: str) -> ModelType | None:
+        """Get ModelType enum from model string."""
+        for model_type in ModelType:
+            if model_type.value == model_string:
+                return model_type
+        return None
+
+    def _is_claude_model(self) -> bool:
+        """Check if current model is a Claude model."""
+        return self.model_type in [
+            ModelType.CLAUDE_HAIKU,
+            ModelType.CLAUDE_SONNET,
+            ModelType.CLAUDE_SONNET_4
+        ]
+
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: str | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        timeout: int | None = None,
+        model: str | ModelType | None = None
+    ) -> ReplicateResponse:
+        """
+        Generate text using the configured model.
+
+        Args:
+            prompt: The user prompt to send to the model.
+            system_prompt: Optional system prompt for context setting.
+            max_tokens: Maximum tokens to generate. Uses model defaults if not specified.
+            temperature: Sampling temperature (0.0-1.0). Uses model defaults if not specified.
+            top_p: Top-p sampling parameter. Defaults to 0.9.
+            timeout: Timeout in seconds. Defaults to 30.
+            model: Override the default model for this request.
+
+        Returns:
+            ReplicateResponse with generated text and metadata.
+
+        Raises:
+            ReplicateAPIError: For API errors.
+            ReplicateRateLimitError: When rate limited.
+            ReplicateTimeoutError: When request times out.
+        """
+        # Handle model override
+        if model:
+            if isinstance(model, ModelType):
+                current_model = model.value
+                current_model_type = model
+            else:
+                current_model = model
+                current_model_type = self._get_model_type(model)
+        else:
+            current_model = self.model
+            current_model_type = self.model_type
+
+        # Get model-specific defaults
+        model_defaults = self.MODEL_DEFAULTS.get(current_model_type, {})
+
+        # Apply defaults (parameter > model default > class default)
+        max_tokens = max_tokens or model_defaults.get("max_tokens", self.DEFAULT_MAX_TOKENS)
+        temperature = temperature or model_defaults.get("temperature", self.DEFAULT_TEMPERATURE)
+        top_p = top_p or self.DEFAULT_TOP_P
+        timeout = timeout or self.DEFAULT_TIMEOUT
+
+        # Format prompt based on model type
+        is_claude = current_model_type in [
+            ModelType.CLAUDE_HAIKU,
+            ModelType.CLAUDE_SONNET,
+            ModelType.CLAUDE_SONNET_4
+        ]
+
+        if is_claude:
+            input_params = self._build_claude_params(
+                prompt, system_prompt, max_tokens, temperature, top_p
+            )
+        else:
+            # Llama-style formatting
+            formatted_prompt = self._format_llama_prompt(prompt, system_prompt)
+            input_params = {
+                "prompt": formatted_prompt,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "top_p": top_p,
+            }
+
+        logger.debug(
+            "Generating text with Replicate",
+            model=current_model,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            is_claude=is_claude
+        )
+
+        # Execute with retry logic
+        start_time = time.time()
+        output = self._execute_with_retry(current_model, input_params, timeout)
+        generation_time = time.time() - start_time
+
+        # Parse response
+        text = self._parse_response(output)
+
+        # Estimate tokens (rough approximation: ~4 chars per token)
+        # Calculate input tokens from the actual prompt sent
+        prompt_text = input_params.get("prompt", "")
+        system_text = input_params.get("system_prompt", "")
+        total_input_text = prompt_text + system_text
+        tokens_input = len(total_input_text) // 4
+
+        # Calculate output tokens from response
+        tokens_output = len(text) // 4
+
+        # Total for backwards compatibility
+        tokens_used = tokens_input + tokens_output
+
+        logger.info(
+            "Replicate generation complete",
+            model=current_model,
+            tokens_input=tokens_input,
+            tokens_output=tokens_output,
+            tokens_used=tokens_used,
+            generation_time=f"{generation_time:.2f}s",
+            response_length=len(text)
+        )
+
+        return ReplicateResponse(
+            text=text.strip(),
+            tokens_used=tokens_used,
+            tokens_input=tokens_input,
+            tokens_output=tokens_output,
+            model=current_model,
+            generation_time=generation_time
+        )
+
+    def _build_claude_params(
+        self,
+        prompt: str,
+        system_prompt: str | None,
+        max_tokens: int,
+        temperature: float,
+        top_p: float
+    ) -> dict[str, Any]:
+        """
+        Build input parameters for Claude models on Replicate.
+
+        Args:
+            prompt: User prompt.
+            system_prompt: Optional system prompt.
+            max_tokens: Maximum tokens to generate.
+            temperature: Sampling temperature.
+            top_p: Top-p sampling parameter.
+
+        Returns:
+            Dictionary of input parameters for Replicate API.
+        """
+        params = {
+            "prompt": prompt,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "top_p": top_p,
+        }
+
+        if system_prompt:
+            params["system_prompt"] = system_prompt
+
+        return params
+
+    def _format_llama_prompt(self, prompt: str, system_prompt: str | None = None) -> str:
+        """
+        Format prompt for Llama-3 Instruct model.
+
+        Llama-3 Instruct uses a specific format with special tokens.
+
+        Args:
+            prompt: User prompt.
+            system_prompt: Optional system prompt.
+
+        Returns:
+            Formatted prompt string.
+        """
+        parts = []
+
+        if system_prompt:
+            parts.append(f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>")
+        else:
+            parts.append("<|begin_of_text|>")
+
+        parts.append(f"<|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|>")
+        parts.append("<|start_header_id|>assistant<|end_header_id|>\n\n")
+
+        return "".join(parts)
+
+    def _parse_response(self, output: Any) -> str:
+        """
+        Parse response from Replicate API.
+
+        Handles both streaming iterators and direct string responses.
+
+        Args:
+            output: Raw output from Replicate API.
+
+        Returns:
+            Parsed text string.
+        """
+        if hasattr(output, '__iter__') and not isinstance(output, str):
+            return "".join(output)
+        return str(output)
+
+    def _execute_with_retry(
+        self,
+        model: str,
+        input_params: dict[str, Any],
+        timeout: int
+    ) -> Any:
+        """
+        Execute Replicate API call with retry logic.
+
+        Implements exponential backoff for rate limit errors.
+
+        Args:
+            model: Model identifier to run.
+            input_params: Input parameters for the model.
+            timeout: Timeout in seconds.
+
+        Returns:
+            API response output.
+
+        Raises:
+            ReplicateAPIError: For API errors after retries.
+            ReplicateRateLimitError: When rate limit persists after retries.
+            ReplicateTimeoutError: When request times out.
+        """
+        last_error = None
+        retry_delay = self.INITIAL_RETRY_DELAY
+
+        for attempt in range(self.MAX_RETRIES):
+            try:
+                output = replicate.run(
+                    model,
+                    input=input_params
+                )
+                return output
+
+            except replicate.exceptions.ReplicateError as e:
+                error_message = str(e).lower()
+
+                if "rate limit" in error_message or "429" in error_message:
+                    last_error = ReplicateRateLimitError(f"Rate limited: {e}")
+
+                    if attempt < self.MAX_RETRIES - 1:
+                        logger.warning(
+                            "Rate limited, retrying",
+                            attempt=attempt + 1,
+                            retry_delay=retry_delay
+                        )
+                        time.sleep(retry_delay)
+                        retry_delay *= 2
+                        continue
+                    else:
+                        raise last_error
+
+                elif "timeout" in error_message:
+                    raise ReplicateTimeoutError(f"Request timed out: {e}")
+
+                else:
+                    raise ReplicateAPIError(f"API error: {e}")
+
+            except Exception as e:
+                error_message = str(e).lower()
+
+                if "timeout" in error_message:
+                    raise ReplicateTimeoutError(f"Request timed out: {e}")
+
+                raise ReplicateAPIError(f"Unexpected error: {e}")
+
+        if last_error:
+            raise last_error
+        raise ReplicateAPIError("Max retries exceeded")
+
+    def validate_api_key(self) -> bool:
+        """
+        Validate that the API key is valid.
+
+        Makes a minimal API call to check credentials.
+
+        Returns:
+            True if API key is valid, False otherwise.
+        """
+        try:
+            model_name = self.model.split(':')[0]
+            model = replicate.models.get(model_name)
+            return model is not None
+        except Exception as e:
+            logger.warning(
+                "API key validation failed",
+                error=str(e)
+            )
+            return False
--- a/api/app/ai/response_parser.py
+++ b/api/app/ai/response_parser.py
@@ -0,0 +1,160 @@
+"""
+Response parser for AI narrative responses.
+
+This module handles AI response parsing. Game state changes (items, gold, XP)
+are now handled exclusively through predetermined dice check outcomes in
+action templates, not through AI-generated JSON.
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, Optional
+
+import structlog
+
+logger = structlog.get_logger(__name__)
+
+
+@dataclass
+class ItemGrant:
+    """
+    Represents an item granted by the AI during gameplay.
+
+    The AI can grant items in two ways:
+    1. By item_id - References an existing item from game data
+    2. By name/type/description - Creates a generic item
+    """
+    item_id: Optional[str] = None  # For existing items
+    name: Optional[str] = None  # For generic items
+    item_type: Optional[str] = None  # consumable, weapon, armor, quest_item
+    description: Optional[str] = None
+    value: int = 0
+    quantity: int = 1
+
+    def is_existing_item(self) -> bool:
+        """Check if this references an existing item by ID."""
+        return self.item_id is not None
+
+    def is_generic_item(self) -> bool:
+        """Check if this is a generic item created by the AI."""
+        return self.item_id is None and self.name is not None
+
+
+@dataclass
+class GameStateChanges:
+    """
+    Structured game state changes extracted from AI response.
+
+    These changes are validated and applied to the character after
+    the AI generates its narrative response.
+    """
+    items_given: list[ItemGrant] = field(default_factory=list)
+    items_taken: list[str] = field(default_factory=list)  # item_ids to remove
+    gold_given: int = 0
+    gold_taken: int = 0
+    experience_given: int = 0
+    quest_offered: Optional[str] = None  # quest_id
+    quest_completed: Optional[str] = None  # quest_id
+    location_change: Optional[str] = None
+
+
+@dataclass
+class ParsedAIResponse:
+    """
+    Complete parsed AI response with narrative and game state changes.
+
+    Attributes:
+        narrative: The narrative text to display to the player
+        game_changes: Structured game state changes to apply
+        raw_response: The original unparsed response from AI
+        parse_success: Whether parsing succeeded
+        parse_errors: Any errors encountered during parsing
+    """
+    narrative: str
+    game_changes: GameStateChanges
+    raw_response: str
+    parse_success: bool = True
+    parse_errors: list[str] = field(default_factory=list)
+
+
+class ResponseParserError(Exception):
+    """Exception raised when response parsing fails critically."""
+    pass
+
+
+def parse_ai_response(response_text: str) -> ParsedAIResponse:
+    """
+    Parse an AI response to extract the narrative text.
+
+    Game state changes (items, gold, XP) are now handled exclusively through
+    predetermined dice check outcomes, not through AI-generated structured data.
+
+    Args:
+        response_text: The raw AI response text
+
+    Returns:
+        ParsedAIResponse with narrative (game_changes will be empty)
+    """
+    logger.debug("Parsing AI response", response_length=len(response_text))
+
+    # Return the full response as narrative
+    # Game state changes come from predetermined check_outcomes, not AI
+    return ParsedAIResponse(
+        narrative=response_text.strip(),
+        game_changes=GameStateChanges(),
+        raw_response=response_text,
+        parse_success=True,
+        parse_errors=[]
+    )
+
+
+def _parse_game_actions(data: dict[str, Any]) -> GameStateChanges:
+    """
+    Parse the game actions dictionary into a GameStateChanges object.
+
+    Args:
+        data: Dictionary from parsed JSON
+
+    Returns:
+        GameStateChanges object with parsed data
+    """
+    changes = GameStateChanges()
+
+    # Parse items_given
+    if "items_given" in data and data["items_given"]:
+        for item_data in data["items_given"]:
+            if isinstance(item_data, dict):
+                item_grant = ItemGrant(
+                    item_id=item_data.get("item_id"),
+                    name=item_data.get("name"),
+                    item_type=item_data.get("type"),
+                    description=item_data.get("description"),
+                    value=item_data.get("value", 0),
+                    quantity=item_data.get("quantity", 1)
+                )
+                changes.items_given.append(item_grant)
+            elif isinstance(item_data, str):
+                # Simple string format - treat as item_id
+                changes.items_given.append(ItemGrant(item_id=item_data))
+
+    # Parse items_taken
+    if "items_taken" in data and data["items_taken"]:
+        changes.items_taken = [
+            item_id for item_id in data["items_taken"]
+            if isinstance(item_id, str)
+        ]
+
+    # Parse gold changes
+    changes.gold_given = int(data.get("gold_given", 0))
+    changes.gold_taken = int(data.get("gold_taken", 0))
+
+    # Parse experience
+    changes.experience_given = int(data.get("experience_given", 0))
+
+    # Parse quest changes
+    changes.quest_offered = data.get("quest_offered")
+    changes.quest_completed = data.get("quest_completed")
+
+    # Parse location change
+    changes.location_change = data.get("location_change")
+
+    return changes
--- a/api/app/ai/templates/combat_action.j2
+++ b/api/app/ai/templates/combat_action.j2
@@ -0,0 +1,81 @@
+{#
+Combat Action Prompt Template
+Used for narrating combat actions and outcomes.
+
+Required context:
+- character: Character object
+- combat_state: Current combat information
+- action: The combat action being taken
+- action_result: Outcome of the action (damage, effects, etc.)
+
+Optional context:
+- is_critical: Whether this was a critical hit/miss
+- is_finishing_blow: Whether this defeats the enemy
+#}
+You are the Dungeon Master narrating an exciting combat encounter.
+
+## Combatants
+**{{ character.name }}** (Level {{ character.level }} {{ character.player_class }})
+- Health: {{ character.current_hp }}/{{ character.max_hp }} HP
+{% if character.effects %}
+- Active Effects: {{ character.effects | format_effects }}
+{% endif %}
+
+**vs**
+
+{% for enemy in combat_state.enemies %}
+**{{ enemy.name }}**
+- Health: {{ enemy.current_hp }}/{{ enemy.max_hp }} HP
+{% if enemy.effects %}
+- Status: {{ enemy.effects | format_effects }}
+{% endif %}
+{% endfor %}
+
+## Combat Round {{ combat_state.round_number }}
+Turn: {{ combat_state.current_turn }}
+
+## Action Taken
+{{ character.name }} {{ action }}
+
+## Action Result
+{% if action_result.hit %}
+- **Hit!** {{ action_result.damage }} damage dealt
+{% if is_critical %}
+- **CRITICAL HIT!**
+{% endif %}
+{% if action_result.effects_applied %}
+- Applied: {{ action_result.effects_applied | join(', ') }}
+{% endif %}
+{% else %}
+- **Miss!** The attack fails to connect
+{% endif %}
+
+{% if is_finishing_blow %}
+**{{ action_result.target }} has been defeated!**
+{% endif %}
+
+## Your Task
+Narrate this combat action:
+1. Describe the action with visceral, cinematic detail
+2. Show the result - the impact, the enemy's reaction
+{% if is_finishing_blow %}
+3. Describe the enemy's defeat dramatically
+{% else %}
+3. Hint at the enemy's remaining threat or weakness
+{% endif %}
+
+{% if max_tokens %}
+**IMPORTANT: Your response must be under {{ (max_tokens * 0.6) | int }} words (approximately {{ max_tokens }} tokens). Complete all sentences - do not get cut off mid-sentence.**
+{% if max_tokens <= 150 %}
+Keep it to 2-3 punchy sentences.
+{% elif max_tokens <= 300 %}
+Keep it to 1 short paragraph.
+{% else %}
+Keep it to 1-2 exciting paragraphs.
+{% endif %}
+{% endif %}
+
+Keep it punchy and action-packed. Use active voice and dynamic verbs.
+Don't include game mechanics in the narrative - just the story.
+
+Respond only with the narrative - no dice rolls or damage numbers.
--- a/api/app/ai/templates/npc_dialogue.j2
+++ b/api/app/ai/templates/npc_dialogue.j2
@@ -0,0 +1,138 @@
+{#
+NPC Dialogue Prompt Template - Enhanced with persistent NPC data.
+Used for generating contextual NPC conversations with rich personality.
+
+Required context:
+- character: Player character information (name, level, player_class)
+- npc: NPC information with personality, appearance, dialogue_hooks
+- conversation_topic: What the player wants to discuss
+- game_state: Current game state
+
+Optional context:
+- npc_knowledge: List of information the NPC can share
+- revealed_secrets: Secrets being revealed this conversation
+- interaction_count: Number of times player has talked to this NPC
+- relationship_level: 0-100 relationship score (50 is neutral)
+- previous_dialogue: Previous exchanges with this NPC
+#}
+You are roleplaying as an NPC in a fantasy world, having a conversation with a player character.
+
+## The NPC
+**{{ npc.name }}** - {{ npc.role }}
+
+{% if npc.appearance %}
+- **Appearance:** {{ npc.appearance if npc.appearance is string else npc.appearance.brief if npc.appearance.brief else npc.appearance }}
+{% endif %}
+
+{% if npc.personality %}
+{% if npc.personality.traits %}
+- **Personality Traits:** {{ npc.personality.traits | join(', ') }}
+{% elif npc.personality is string %}
+- **Personality:** {{ npc.personality }}
+{% endif %}
+{% if npc.personality.speech_style %}
+- **Speaking Style:** {{ npc.personality.speech_style }}
+{% endif %}
+{% if npc.personality.quirks %}
+- **Quirks:** {{ npc.personality.quirks | join('; ') }}
+{% endif %}
+{% endif %}
+
+{% if npc.dialogue_hooks and npc.dialogue_hooks.greeting %}
+- **Typical Greeting:** "{{ npc.dialogue_hooks.greeting }}"
+{% endif %}
+
+{% if npc.goals %}
+- **Current Goals:** {{ npc.goals }}
+{% endif %}
+
+## The Player Character
+**{{ character.name }}** - Level {{ character.level }} {{ character.player_class }}
+{% if interaction_count and interaction_count > 1 %}
+- **Familiarity:** This is conversation #{{ interaction_count }} - the NPC recognizes {{ character.name }}
+{% endif %}
+{% if relationship_level %}
+{% if relationship_level >= 80 %}
+- **Relationship:** Close friend ({{ relationship_level }}/100) - treats player warmly
+{% elif relationship_level >= 60 %}
+- **Relationship:** Friendly acquaintance ({{ relationship_level }}/100) - helpful and open
+{% elif relationship_level >= 40 %}
+- **Relationship:** Neutral ({{ relationship_level }}/100) - professional but guarded
+{% elif relationship_level >= 20 %}
+- **Relationship:** Distrustful ({{ relationship_level }}/100) - wary and curt
+{% else %}
+- **Relationship:** Hostile ({{ relationship_level }}/100) - dismissive or antagonistic
+{% endif %}
+{% endif %}
+
+## Current Setting
+- **Location:** {{ game_state.current_location }}
+{% if game_state.time_of_day %}
+- **Time:** {{ game_state.time_of_day }}
+{% endif %}
+{% if game_state.active_quests %}
+- **Player's Active Quests:** {{ game_state.active_quests | length }}
+{% endif %}
+
+{% if npc_knowledge %}
+## Knowledge the NPC May Share
+The NPC knows about the following (share naturally as relevant to conversation):
+{% for info in npc_knowledge %}
+- {{ info }}
+{% endfor %}
+{% endif %}
+
+{% if revealed_secrets %}
+## IMPORTANT: Secrets to Reveal This Conversation
+Based on the player's relationship with this NPC, naturally reveal the following:
+{% for secret in revealed_secrets %}
+- {{ secret }}
+{% endfor %}
+Work these into the dialogue naturally - don't dump all information at once.
+Make it feel earned, like the NPC is opening up to someone they trust.
+{% endif %}
+
+{% if npc.relationships %}
+## NPC Relationships (for context)
+{% for rel in npc.relationships %}
+- Feels {{ rel.attitude }} toward {{ rel.npc_id }}{% if rel.reason %} ({{ rel.reason }}){% endif %}
+
+{% endfor %}
+{% endif %}
+
+{% if previous_dialogue %}
+## Previous Conversation
+{% for exchange in previous_dialogue[-2:] %}
+- **{{ character.name }}:** {{ exchange.player_line | truncate_text(100) }}
+- **{{ npc.name }}:** {{ exchange.npc_response | truncate_text(100) }}
+{% endfor %}
+{% endif %}
+
+## Player Says
+"{{ conversation_topic }}"
+
+## Your Task
+Respond as {{ npc.name }} in character. Generate dialogue that:
+1. **Matches the NPC's personality and speech style exactly** - use their quirks, accent, and manner
+2. **Acknowledges the relationship** - be warmer to friends, cooler to strangers
+3. **Shares relevant knowledge naturally** - don't info-dump, weave it into conversation
+4. **Reveals secrets if specified** - make it feel like earned trust, not random exposition
+5. **Feels alive and memorable** - give this NPC a distinct voice
+
+{% if max_tokens %}
+**IMPORTANT: Your response must be under {{ (max_tokens * 0.6) | int }} words (approximately {{ max_tokens }} tokens). Complete all sentences - do not get cut off mid-sentence.**
+{% if max_tokens <= 150 %}
+Keep it to 1-2 sentences of dialogue.
+{% elif max_tokens <= 300 %}
+Keep it to 2-3 sentences of dialogue.
+{% else %}
+Keep it to 2-4 sentences of dialogue.
+{% endif %}
+{% else %}
+Keep the response to 2-4 sentences of dialogue.
+{% endif %}
+
+You may include brief action/emotion tags in *asterisks* to show gestures and expressions.
+
+Respond only as the NPC - no narration or out-of-character text.
+Format: *action/emotion* "Dialogue goes here."
--- a/api/app/ai/templates/quest_offering.j2
+++ b/api/app/ai/templates/quest_offering.j2
@@ -0,0 +1,61 @@
+{#
+Quest Offering Prompt Template
+Used for AI to select the most contextually appropriate quest.
+
+Required context:
+- eligible_quests: List of quest objects that can be offered
+- game_context: Current game state information
+- character: Character information
+
+Optional context:
+- recent_actions: Recent player actions
+#}
+You are selecting the most appropriate quest to offer to a player based on their current context.
+
+## Player Character
+**{{ character.name }}** - Level {{ character.level }} {{ character.player_class }}
+{% if character.completed_quests %}
+- Completed Quests: {{ character.completed_quests | length }}
+{% endif %}
+
+## Current Context
+- **Location:** {{ game_context.current_location }} ({{ game_context.location_type }})
+{% if recent_actions %}
+- **Recent Actions:**
+{% for action in recent_actions[-3:] %}
+  - {{ action }}
+{% endfor %}
+{% endif %}
+{% if game_context.active_quests %}
+- **Active Quests:** {{ game_context.active_quests | length }} in progress
+{% endif %}
+{% if game_context.world_events %}
+- **Current Events:** {{ game_context.world_events | join(', ') }}
+{% endif %}
+
+## Available Quests
+{% for quest in eligible_quests %}
+### {{ loop.index }}. {{ quest.name }}
+- **Quest ID:** {{ quest.quest_id }}
+- **Difficulty:** {{ quest.difficulty }}
+- **Quest Giver:** {{ quest.quest_giver }}
+- **Description:** {{ quest.description | truncate_text(200) }}
+- **Narrative Hooks:**
+{% for hook in quest.narrative_hooks %}
+  - {{ hook }}
+{% endfor %}
+{% endfor %}
+
+## Your Task
+Select the ONE quest that best fits the current narrative context.
+
+Consider:
+1. Which quest's narrative hooks connect best to the player's recent actions?
+2. Which quest giver makes sense for this location?
+3. Which difficulty is appropriate for the character's level and situation?
+4. Which quest would feel most natural to discover right now?
+
+Respond with ONLY the quest_id of your selection on a single line.
+Example response: quest_goblin_cave
+
+Do not include any explanation - just the quest_id.
--- a/api/app/ai/templates/story_action.j2
+++ b/api/app/ai/templates/story_action.j2
@@ -0,0 +1,112 @@
+{#
+Story Action Prompt Template
+Used for generating DM responses to player story actions.
+
+Required context:
+- character: Character object with name, level, player_class, stats
+- game_state: GameState with current_location, location_type, active_quests
+- action: String describing the player's action
+- conversation_history: List of recent conversation entries (optional)
+
+Optional context:
+- custom_topic: For specific queries
+- world_context: Additional world information
+#}
+You are the Dungeon Master for {{ character.name }}, a level {{ character.level }} {{ character.player_class }}.
+
+## Character Status
+- **Health:** {{ character.current_hp }}/{{ character.max_hp }} HP
+- **Stats:** {{ character.stats | format_stats }}
+{% if character.skills %}
+- **Skills:** {{ character.skills | format_skills }}
+{% endif %}
+{% if character.effects %}
+- **Active Effects:** {{ character.effects | format_effects }}
+{% endif %}
+
+## Current Situation
+- **Location:** {{ game_state.current_location }} ({{ game_state.location_type }})
+{% if game_state.discovered_locations %}
+- **Known Locations:** {{ game_state.discovered_locations | join(', ') }}
+{% endif %}
+{% if game_state.active_quests %}
+- **Active Quests:** {{ game_state.active_quests | length }} quest(s) in progress
+{% endif %}
+{% if game_state.time_of_day %}
+- **Time:** {{ game_state.time_of_day }}
+{% endif %}
+
+{% if location %}
+## Location Details
+- **Place:** {{ location.name }}
+- **Type:** {{ location.type if location.type else game_state.location_type }}
+{% if location.description %}
+- **Description:** {{ location.description | truncate_text(300) }}
+{% endif %}
+{% if location.ambient %}
+- **Atmosphere:** {{ location.ambient | truncate_text(200) }}
+{% endif %}
+{% if location.lore %}
+- **Lore:** {{ location.lore | truncate_text(150) }}
+{% endif %}
+{% endif %}
+
+{% if npcs_present %}
+## NPCs Present
+{% for npc in npcs_present %}
+- **{{ npc.name }}** ({{ npc.role }}): {{ npc.appearance if npc.appearance is string else npc.appearance.brief if npc.appearance else 'No description' }}
+{% endfor %}
+These NPCs are available for conversation. Include them naturally in the scene if relevant.
+{% endif %}
+
+{% if conversation_history %}
+## Recent History
+{% for entry in conversation_history[-3:] %}
+- **Turn {{ entry.turn }}:** {{ entry.action }}
+  > {{ entry.dm_response }}
+{% endfor %}
+{% endif %}
+
+## Player Action
+{{ action }}
+
+{% if action_instructions %}
+## Action-Specific Instructions
+{{ action_instructions }}
+{% endif %}
+
+## Your Task
+Generate a narrative response that:
+1. Acknowledges the player's action and describes their attempt
+2. Describes what happens as a result, including any discoveries or consequences
+3. Sets up the next decision point or opportunity for action
+
+{% if max_tokens %}
+**IMPORTANT: Your response must be under {{ (max_tokens * 0.7) | int }} words (approximately {{ max_tokens }} tokens). Complete all sentences - do not get cut off mid-sentence.**
+{% if max_tokens <= 150 %}
+Keep it to 1 short paragraph (2-3 sentences).
+{% elif max_tokens <= 300 %}
+Keep it to 1 paragraph (4-5 sentences).
+{% elif max_tokens <= 600 %}
+Keep it to 1-2 paragraphs.
+{% else %}
+Keep it to 2-3 paragraphs.
+{% endif %}
+{% endif %}
+
+Keep the tone immersive and engaging. Use vivid descriptions but stay concise.
+If the action involves NPCs, give them personality and realistic reactions.
+If the action could fail or succeed, describe the outcome based on the character's abilities.
+
+**CRITICAL RULES - Player Agency:**
+- NEVER make decisions for the player (no auto-purchasing, no automatic commitments)
+- NEVER complete transactions without explicit player consent
+- NEVER take items or spend gold without the player choosing to do so
+- Present options, choices, or discoveries - then let the player decide
+- End with clear options or a question about what they want to do next
+- If items/services have costs, always state prices and ask if they want to proceed
+
+{% if world_context %}
+## World Context
+{{ world_context }}
+{% endif %}