refactor: remove dead assistant_chat system, consolidate image helpers

The old /assistant/chats/* CRUD endpoints and assistant_chat_service chat functions were unused — the frontend exclusively uses /ai-sessions/{id}/chat (unified_chat_service) for all chat operations. Removed: - Chat CRUD endpoints (create, list, get, send, delete, conclude) - assistant_chat_service: create_chat, send_message, generate_conclusion_summary, CONCLUSION_SYSTEM_PROMPT - Frontend: assistantChatApi chat methods, dead types (AssistantChat, AssistantChatMessage, ConcludeChatRequest, etc.) Kept: - /assistant/retention endpoints (used by ChatRetentionSettingsPage) - Shared AI infrastructure (_call_ai, _call_anthropic_cached, ASSISTANT_SYSTEM_PROMPT, _auto_title) — imported by unified_chat_service Moved: - fetch_upload_images + resize_image_for_vision → storage_service.py (shared location, not tied to dead endpoint) Also added "Image Analysis" section to system prompt so Claude knows to describe attached screenshots. -650 lines of dead code removed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 05:28:06 +00:00
parent 36ca830481
commit 8e7f13d2f8
8 changed files with 141 additions and 791 deletions
--- a/backend/app/api/endpoints/ai_sessions.py
+++ b/backend/app/api/endpoints/ai_sessions.py
@@ -283,8 +283,8 @@ async def send_chat_message(
    # Fetch attached images from S3 (if any)
    images = None
    if data.upload_ids:
-        from app.api.endpoints.assistant_chat import _fetch_upload_images
+        from app.services.storage_service import fetch_upload_images
-        images = await _fetch_upload_images(data.upload_ids, account_id, db) or None
+        images = await fetch_upload_images(data.upload_ids, account_id, db) or None
    try:
        ai_content, suggested_flows, session = await unified_chat_service.send_chat_message(
--- a/backend/app/api/endpoints/assistant_chat.py
+++ b/backend/app/api/endpoints/assistant_chat.py
@@ -1,453 +1,29 @@
-"""Standalone AI assistant chat endpoints.
+"""Chat retention settings endpoints.
  POST   /assistant/chats              — Create new chat
  GET    /assistant/chats              — List chats (paginated, newest first)
  GET    /assistant/chats/{id}         — Get chat with messages
  POST   /assistant/chats/{id}/messages — Send message
  PATCH  /assistant/chats/{id}         — Update title, pin/unpin
  DELETE /assistant/chats/{id}         — Delete single chat
  DELETE /assistant/chats              — Bulk delete (older_than_days query param)
  GET    /assistant/retention          — Get account retention settings
  PATCH  /assistant/retention          — Update retention settings (owner only)
 """
 import base64
 import logging
 from datetime import datetime, timezone, timedelta
 from typing import Annotated, Any, Optional
 from uuid import UUID
-from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
+Note: Chat CRUD endpoints were removed — the frontend uses /ai-sessions/{id}/chat
-from sqlalchemy import select, delete, func
+(unified_chat_service) for all chat operations. The /assistant prefix is kept for
 the retention settings to avoid a frontend URL change.
 """
 from typing import Annotated, Optional
 from fastapi import APIRouter, Depends, HTTPException, status
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
-from app.core.rate_limit import limiter
+from app.api.deps import get_current_active_user, get_db
 from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
 from app.core.config import settings
 from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan
 from app.models.user import User
 from app.models.account import Account
 from app.models.assistant_chat import AssistantChat
 from app.models.file_upload import FileUpload
 from app.schemas.assistant_chat import (
    ChatCreateRequest,
    ChatMessageRequest,
    ChatMessageResponse,
    ChatListResponse,
    ChatDetailResponse,
    ChatUpdateRequest,
    RetentionSettingsResponse,
    RetentionSettingsUpdate,
    ConcludeChatRequest,
    ConcludeChatResponse,
 )
 from app.schemas.copilot import SuggestedFlow
 from app.services import assistant_chat_service
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/assistant", tags=["assistant-chat"])
 VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
 # Claude vision costs: (width × height) / 750 tokens per image.
 # Claude auto-resizes images >1568px on the longest edge.
 # We resize server-side to avoid sending multi-MB base64 payloads over the wire.
 MAX_IMAGE_DIMENSION = 1568  # Claude's max efficient resolution
 MAX_IMAGES_PER_MESSAGE = 3  # Cap to control token budget
 def _resize_image_for_vision(file_data: bytes, content_type: str) -> tuple[bytes, str]:
    """Resize image to fit within Claude's efficient vision bounds.
    Returns (resized_bytes, media_type). Converts PNG screenshots to JPEG
    when it reduces size significantly (screenshots are often huge PNGs).
    """
    try:
        from PIL import Image
        from io import BytesIO
        img = Image.open(BytesIO(file_data))
        w, h = img.size
        # Only resize if larger than Claude's max efficient dimension
        if max(w, h) > MAX_IMAGE_DIMENSION:
            ratio = MAX_IMAGE_DIMENSION / max(w, h)
            new_w, new_h = int(w * ratio), int(h * ratio)
            img = img.resize((new_w, new_h), Image.LANCZOS)
        # Convert RGBA (common in screenshots) to RGB for JPEG
        out_type = content_type
        if img.mode in ("RGBA", "P") and content_type == "image/png":
            img = img.convert("RGB")
            out_type = "image/jpeg"
        buf = BytesIO()
        if out_type == "image/jpeg":
            img.save(buf, format="JPEG", quality=85, optimize=True)
        else:
            img.save(buf, format=img.format or "PNG", optimize=True)
        result = buf.getvalue()
        # Only use resized version if it's actually smaller
        if len(result) < len(file_data):
            return result, out_type
        return file_data, content_type
    except ImportError:
        # Pillow not installed — send original (Claude auto-resizes)
        logger.debug("Pillow not available, sending original image to Claude")
        return file_data, content_type
    except Exception:
        logger.warning("Image resize failed, sending original")
        return file_data, content_type
 async def _fetch_upload_images(
    upload_ids: list[UUID],
    account_id: UUID,
    db: AsyncSession,
 ) -> list[dict[str, Any]]:
    """Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision.
    Resizes images server-side to reduce network payload and applies a per-message
    cap to control token budget (~1,600 tokens per full-res image).
    """
    if not upload_ids or not settings.STORAGE_ENDPOINT:
        return []
    from app.services import storage_service
    # Cap the number of images to limit token cost
    capped_ids = upload_ids[:MAX_IMAGES_PER_MESSAGE]
    if len(upload_ids) > MAX_IMAGES_PER_MESSAGE:
        logger.info(
            "Capped images from %d to %d for token budget",
            len(upload_ids), MAX_IMAGES_PER_MESSAGE,
        )
    result = await db.execute(
        select(FileUpload).where(
            FileUpload.id.in_(capped_ids),
            FileUpload.account_id == account_id,
            FileUpload.content_type.in_(VISION_CONTENT_TYPES),
        )
    )
    uploads = result.scalars().all()
    images: list[dict[str, Any]] = []
    for upload in uploads:
        try:
            file_data = storage_service.download_file(upload.storage_key)
            resized_data, media_type = _resize_image_for_vision(
                file_data, upload.content_type
            )
            images.append({
                "media_type": media_type,
                "data": base64.b64encode(resized_data).decode("ascii"),
            })
        except Exception:
            logger.warning("Failed to fetch upload %s from S3", upload.id)
    return images
 def _require_ai_enabled() -> None:
    if not settings.ai_enabled:
        raise HTTPException(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.",
        )
@router.post("/chats", response_model=ChatDetailResponse, status_code=201)
@limiter.limit("10/minute")
 async def create_chat(
    request: Request,
    data: ChatCreateRequest,
    current_user: Annotated[User, Depends(get_current_active_user)],
    db: Annotated[AsyncSession, Depends(get_db)],
    _: None = Depends(require_engineer_or_admin),
 ):
    """Create a new empty chat conversation."""
    chat = await assistant_chat_service.create_chat(
        user_id=current_user.id,
        account_id=current_user.account_id,
        db=db,
    )
    await db.commit()
    return ChatDetailResponse.model_validate(chat)
@router.get("/chats", response_model=list[ChatListResponse])
 async def list_chats(
    current_user: Annotated[User, Depends(get_current_active_user)],
    db: Annotated[AsyncSession, Depends(get_db)],
    page: int = Query(1, ge=1),
    size: int = Query(20, ge=1, le=100),
 ):
    """List user's chat conversations (newest first, pinned on top)."""
    offset = (page - 1) * size
    result = await db.execute(
        select(AssistantChat)
        .where(AssistantChat.user_id == current_user.id)
        .order_by(AssistantChat.pinned.desc(), AssistantChat.updated_at.desc())
        .offset(offset)
        .limit(size)
    )
    chats = result.scalars().all()
    return [ChatListResponse.model_validate(c) for c in chats]
@router.get("/chats/{chat_id}", response_model=ChatDetailResponse)
 async def get_chat(
    chat_id: UUID,
    current_user: Annotated[User, Depends(get_current_active_user)],
    db: Annotated[AsyncSession, Depends(get_db)],
 ):
    """Get a chat with full message history."""
    result = await db.execute(
        select(AssistantChat).where(
            AssistantChat.id == chat_id,
            AssistantChat.user_id == current_user.id,
        )
    )
    chat = result.scalar_one_or_none()
    if not chat:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
    return ChatDetailResponse.model_validate(chat)
@router.post("/chats/{chat_id}/messages", response_model=ChatMessageResponse)
@limiter.limit("10/minute")
 async def post_message(
    request: Request,
    chat_id: UUID,
    data: ChatMessageRequest,
    current_user: Annotated[User, Depends(get_current_active_user)],
    db: Annotated[AsyncSession, Depends(get_db)],
    _: None = Depends(require_engineer_or_admin),
 ):
    """Send a message and get AI response."""
    _require_ai_enabled()
    allowed, quota_status = await check_ai_quota(
        user_id=current_user.id,
        account_id=current_user.account_id,
        db=db,
        billing_anchor=current_user.ai_billing_cycle_anchor_at,
        is_super_admin=current_user.is_super_admin,
    )
    if not allowed:
        reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at"
        raise HTTPException(
            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
            detail={
                "message": f"AI limit exceeded ({quota_status['deny_reason']})",
                "reset_at": quota_status.get(reset_key),
                "quota": quota_status,
            },
        )
    plan = await get_user_plan(current_user.account_id, db)
    # Capture scalar fields before the try block — after db.rollback()
    # the ORM objects are expired and accessing attributes triggers a
    # lazy load, which crashes in async context (MissingGreenlet).
    user_id = current_user.id
    account_id = current_user.account_id
    # Fetch attached images from S3 (if any)
    images = await _fetch_upload_images(data.upload_ids, account_id, db)
    try:
        ai_content, suggested_flows, chat = await assistant_chat_service.send_message(
            chat_id=chat_id,
            user_id=user_id,
            account_id=account_id,
            message=data.message,
            db=db,
            images=images or None,
        )
    except ValueError as e:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
    except Exception as e:
        logger.exception("Assistant chat message failed: %s", e)
        await db.rollback()
        await record_ai_usage(
            user_id=user_id,
            account_id=account_id,
            conversation_id=None,
            generation_type="assistant_message",
            tier=plan,
            input_tokens=0,
            output_tokens=0,
            estimated_cost=0,
            succeeded=False,
            counts_toward_quota=False,
            error_code=type(e).__name__,
            extra_data={"assistant_chat_id": str(chat_id)},
            db=db,
        )
        await db.commit()
        raise HTTPException(
            status_code=status.HTTP_502_BAD_GATEWAY,
            detail=f"AI provider error ({type(e).__name__}). Please try again.",
        )
    await record_ai_usage(
        user_id=user_id,
        account_id=account_id,
        conversation_id=None,
        generation_type="assistant_message",
        tier=plan,
        input_tokens=chat.total_input_tokens,
        output_tokens=chat.total_output_tokens,
        estimated_cost=(
            chat.total_input_tokens * 1.0 / 1_000_000
            + chat.total_output_tokens * 5.0 / 1_000_000
        ),
        succeeded=True,
        counts_toward_quota=False,
        error_code=None,
        extra_data={"assistant_chat_id": str(chat_id)},
        db=db,
    )
    await db.commit()
    return ChatMessageResponse(
        content=ai_content,
        suggested_flows=[SuggestedFlow.model_validate(sf) for sf in suggested_flows],
    )
@router.post("/chats/{chat_id}/conclude", response_model=ConcludeChatResponse)
@limiter.limit("10/minute")
 async def conclude_chat(
    request: Request,
    chat_id: UUID,
    data: ConcludeChatRequest,
    current_user: Annotated[User, Depends(get_current_active_user)],
    db: Annotated[AsyncSession, Depends(get_db)],
    _: None = Depends(require_engineer_or_admin),
 ):
    """Conclude a chat session and generate ticket-ready summary."""
    _require_ai_enabled()
    result = await db.execute(
        select(AssistantChat).where(
            AssistantChat.id == chat_id,
            AssistantChat.user_id == current_user.id,
        )
    )
    chat = result.scalar_one_or_none()
    if not chat:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
    if chat.concluded_at:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Chat already concluded",
        )
    if chat.message_count < 2:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Chat must have at least one exchange before concluding",
        )
    try:
        summary = await assistant_chat_service.generate_conclusion_summary(
            chat=chat,
            outcome=data.outcome,
            notes=data.notes,
        )
    except Exception as e:
        logger.exception("Failed to generate conclusion summary: %s", e)
        raise HTTPException(
            status_code=status.HTTP_502_BAD_GATEWAY,
            detail="Failed to generate summary. Please try again.",
        )
    now = datetime.now(timezone.utc)
    chat.conclusion_outcome = data.outcome
    chat.conclusion_summary = summary
    chat.concluded_at = now
    await db.commit()
    return ConcludeChatResponse(
        summary=summary,
        outcome=data.outcome,
        concluded_at=now,
    )
@router.patch("/chats/{chat_id}", response_model=ChatDetailResponse)
 async def update_chat(
    chat_id: UUID,
    data: ChatUpdateRequest,
    current_user: Annotated[User, Depends(get_current_active_user)],
    db: Annotated[AsyncSession, Depends(get_db)],
 ):
    """Update chat title or pin/unpin."""
    result = await db.execute(
        select(AssistantChat).where(
            AssistantChat.id == chat_id,
            AssistantChat.user_id == current_user.id,
        )
    )
    chat = result.scalar_one_or_none()
    if not chat:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
    if data.title is not None:
        chat.title = data.title
    if data.pinned is not None:
        chat.pinned = data.pinned
    await db.commit()
    return ChatDetailResponse.model_validate(chat)
@router.delete("/chats/{chat_id}", status_code=204)
 async def delete_chat(
    chat_id: UUID,
    current_user: Annotated[User, Depends(get_current_active_user)],
    db: Annotated[AsyncSession, Depends(get_db)],
 ):
    """Delete a single chat."""
    result = await db.execute(
        select(AssistantChat).where(
            AssistantChat.id == chat_id,
            AssistantChat.user_id == current_user.id,
        )
    )
    chat = result.scalar_one_or_none()
    if not chat:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
    await db.delete(chat)
    await db.commit()
@router.delete("/chats", status_code=204)
 async def bulk_delete_chats(
    current_user: Annotated[User, Depends(get_current_active_user)],
    db: Annotated[AsyncSession, Depends(get_db)],
    older_than_days: int = Query(..., ge=1),
 ):
    """Bulk delete chats older than N days (skips pinned)."""
    cutoff = datetime.now(timezone.utc) - timedelta(days=older_than_days)
    await db.execute(
        delete(AssistantChat).where(
            AssistantChat.user_id == current_user.id,
            AssistantChat.pinned == False,  # noqa: E712
            AssistantChat.updated_at < cutoff,
        )
    )
    await db.commit()
@router.get("/retention", response_model=RetentionSettingsResponse)
 async def get_retention_settings(
    current_user: Annotated[User, Depends(get_current_active_user)],
--- a/backend/app/schemas/assistant_chat.py
+++ b/backend/app/schemas/assistant_chat.py
@@ -1,54 +1,11 @@
-"""Pydantic schemas for standalone AI assistant chat."""
+"""Pydantic schemas for chat retention settings.
-from typing import Optional, Any, Literal
+
-from uuid import UUID
+Chat CRUD schemas were removed — the active chat system uses
-from datetime import datetime
+schemas from ai_session.py via the /ai-sessions endpoints.
 """
 from typing import Optional
 from pydantic import BaseModel, Field
 from app.schemas.copilot import SuggestedFlow
 class ChatCreateRequest(BaseModel):
    """Empty body — creates a new blank conversation."""
    pass
 class ChatMessageRequest(BaseModel):
    message: str = Field(..., min_length=1, max_length=8000)
    upload_ids: list[UUID] = Field(default_factory=list, max_length=10)
 class ChatMessageResponse(BaseModel):
    content: str
    suggested_flows: list[SuggestedFlow] = []
 class ChatListResponse(BaseModel):
    id: UUID
    title: str
    message_count: int
    pinned: bool
    created_at: datetime
    updated_at: datetime
    model_config = {"from_attributes": True}
 class ChatDetailResponse(BaseModel):
    id: UUID
    title: str
    messages: list[dict[str, Any]]
    message_count: int
    pinned: bool
    created_at: datetime
    updated_at: datetime
    model_config = {"from_attributes": True}
 class ChatUpdateRequest(BaseModel):
    title: Optional[str] = Field(None, min_length=1, max_length=255)
    pinned: Optional[bool] = None
 class RetentionSettingsResponse(BaseModel):
    chat_retention_days: Optional[int]
@@ -58,14 +15,3 @@ class RetentionSettingsResponse(BaseModel):
 class RetentionSettingsUpdate(BaseModel):
    chat_retention_days: Optional[int] = Field(None, ge=1, le=365)
    chat_retention_max_count: Optional[int] = Field(None, ge=10, le=10000)
 class ConcludeChatRequest(BaseModel):
    outcome: Literal["resolved", "escalated", "paused"]
    notes: Optional[str] = Field(None, max_length=2000)
 class ConcludeChatResponse(BaseModel):
    summary: str
    outcome: str
    concluded_at: datetime
--- a/backend/app/services/assistant_chat_service.py
+++ b/backend/app/services/assistant_chat_service.py
@@ -1,7 +1,7 @@
-"""Standalone AI assistant chat service with RAG context.
+"""Shared AI chat infrastructure — system prompt, prompt caching, and AI calling.
-Provides persistent conversation history for general IT questions
+Used by unified_chat_service (the active chat backend). The assistant_chat
-with semantic search over the team's flow library.
+CRUD endpoints were removed — only retention settings remain on that router.
 Uses Anthropic prompt caching to reduce cost on multi-turn conversations:
 - The static system prompt is cached (ephemeral, 5-min TTL)
@@ -13,14 +13,8 @@ for real-time documentation lookups (controlled by ENABLE_MCP_MICROSOFT_LEARN).
 """
 import logging
 from typing import Any
 from uuid import UUID
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.core.config import settings
 from app.models.assistant_chat import AssistantChat
 from app.services.rag_service import search as rag_search, build_rag_context, extract_suggested_flows
 logger = logging.getLogger(__name__)
@@ -74,6 +68,11 @@ You have access to Microsoft's official documentation via Microsoft Learn. Use i
 - No team flow covers the topic and vendor-specific detail would help
 Do NOT use Microsoft Learn for every question — only when official docs add real value.
 ## Image Analysis
 When an image is attached, analyze it carefully. Screenshots of error messages, \
 config panels, event viewer logs, and network diagrams are common in MSP work. \
 Describe what you see and use the visual information to inform your troubleshooting advice.
 ## Boundaries
 - Stay focused on IT infrastructure, systems administration, and MSP operations.
 - If a question is clearly outside your domain, say so briefly and redirect.
@@ -273,199 +272,3 @@ def _auto_title(message: str) -> str:
    if len(message) > 100:
        title = title.rsplit(" ", 1)[0] + "..."
    return title
 CONCLUSION_SYSTEM_PROMPT = """\
 You are a ticket documentation specialist for MSP (Managed Service Provider) teams. \
 Your job is to transform an AI troubleshooting conversation into clean, professional \
 ticket notes that can be pasted directly into a PSA/ticketing system (ConnectWise, \
 Autotask, HaloPSA, etc.).
 ## Output Format
 Generate a structured summary using this exact format:
 **Subject:** [One-line summary of the issue]
 **Outcome:** {outcome_label}
 **Problem Description:**
 [2-3 sentence summary of the original problem]
 **Steps Taken:**
 1. [Step] — [Result/finding]
 2. [Step] — [Result/finding]
 (list all troubleshooting steps from the conversation)
 **Current Status:**
 [Where things stand now — what was resolved, what remains]
 {notes_section}
 **Key Findings:**
 - [Important discovery or configuration detail]
 - [Any relevant error codes, settings, or values identified]
 {resume_section}
 ## Rules
 - Be concise but thorough — these notes will be read by another engineer
 - Include specific technical details (commands run, error messages, config values)
 - Use plain text formatting (no HTML) — bold with ** is fine
 - Do NOT include conversational filler, greetings, or meta-commentary
 - Extract ALL actionable steps from the conversation, in chronological order
 - If the conversation identified root cause, state it clearly
 """
 async def generate_conclusion_summary(
    chat: "AssistantChat",
    outcome: str,
    notes: str | None = None,
 ) -> str:
    """Generate a ticket-ready summary from a concluded chat conversation."""
    outcome_labels = {
        "resolved": "Resolved",
        "escalated": "Escalated",
        "paused": "Paused — To Be Continued",
    }
    outcome_label = outcome_labels.get(outcome, outcome)
    notes_section = ""
    if notes:
        notes_section = f"\n**Engineer Notes:**\n{notes}\n"
    resume_section = ""
    if outcome == "paused":
        resume_section = (
            "\n**Next Steps (for resumption):**\n"
            "- [What needs to happen next]\n"
            "- [Any pending actions or follow-ups]\n"
        )
    elif outcome == "escalated":
        resume_section = (
            "\n**Escalation Details:**\n"
            "- [Reason for escalation]\n"
            "- [Recommended next steps for receiving team/tier]\n"
        )
    # Build the conversation transcript for the AI
    transcript_lines = []
    for msg in chat.messages:
        role_label = "ENGINEER" if msg["role"] == "user" else "AI ASSISTANT"
        transcript_lines.append(f"[{role_label}]: {msg['content']}")
    transcript = "\n\n".join(transcript_lines)
    prompt = (
        f"Outcome: {outcome_label}\n\n"
        f"{'Engineer Notes: ' + notes if notes else '(No additional notes)'}\n\n"
        f"--- CONVERSATION TRANSCRIPT ---\n\n{transcript}\n\n"
        f"--- END TRANSCRIPT ---\n\n"
        f"Generate the ticket notes now. Replace all placeholder brackets with actual content from the conversation. "
        f"The notes_section placeholder should be: {notes_section or '(omit this section)'}\n"
        f"The resume_section placeholder should be filled based on the conversation context."
    )
    system_with_vars = CONCLUSION_SYSTEM_PROMPT.replace(
        "{outcome_label}", outcome_label
    ).replace(
        "{notes_section}", notes_section or ""
    ).replace(
        "{resume_section}", resume_section
    )
    content, _, _ = await _call_ai(
        system_base=system_with_vars,
        rag_context="",
        history=[],
        new_message=prompt,
        max_tokens=2048,
    )
    return content
 async def create_chat(
    user_id: UUID,
    account_id: UUID,
    db: AsyncSession,
 ) -> AssistantChat:
    """Create a new empty chat."""
    chat = AssistantChat(
        user_id=user_id,
        account_id=account_id,
        messages=[],
    )
    db.add(chat)
    await db.flush()
    return chat
 async def send_message(
    chat_id: UUID,
    user_id: UUID,
    account_id: UUID,
    message: str,
    db: AsyncSession,
    images: list[dict[str, Any]] | None = None,
 ) -> tuple[str, list[dict[str, Any]], AssistantChat]:
    """Send a user message and get AI response.
    Args:
        images: Optional list of {"media_type": str, "data": str (base64)}
                for vision content attached to this message.
    Returns (ai_content, suggested_flows, chat).
    """
    result = await db.execute(
        select(AssistantChat).where(
            AssistantChat.id == chat_id,
            AssistantChat.user_id == user_id,
        )
    )
    chat = result.scalar_one_or_none()
    if not chat:
        raise ValueError("Chat not found")
    # Auto-title from first message
    if chat.message_count == 0:
        chat.title = _auto_title(message)
    # RAG search
    rag_results = await rag_search(
        query=message,
        account_id=account_id,
        db=db,
        limit=8,
    )
    rag_context = build_rag_context(rag_results)
    # Build messages for AI
    ai_messages: list[dict[str, Any]] = []
    for msg in chat.messages:
        if msg["role"] in ("user", "assistant"):
            ai_messages.append({"role": msg["role"], "content": msg["content"]})
    # Call AI with prompt caching (Anthropic) or generic provider
    ai_content, input_tokens, output_tokens = await _call_ai(
        system_base=ASSISTANT_SYSTEM_PROMPT,
        rag_context=rag_context,
        history=ai_messages,
        new_message=message,
        images=images,
    )
    # Update chat
    msgs = list(chat.messages)
    msgs.append({"role": "user", "content": message})
    msgs.append({"role": "assistant", "content": ai_content})
    chat.messages = msgs
    chat.message_count += 2
    chat.total_input_tokens += input_tokens
    chat.total_output_tokens += output_tokens
    suggested_flows = extract_suggested_flows(rag_results)
    return ai_content, suggested_flows, chat
--- a/backend/app/services/storage_service.py
+++ b/backend/app/services/storage_service.py
@@ -1,7 +1,10 @@
 """S3-compatible object storage service for file uploads."""
 import base64
 import logging
 import uuid
 from io import BytesIO
 from typing import Any
 from uuid import UUID
 import boto3
 from botocore.config import Config as BotoConfig
@@ -92,3 +95,107 @@ async def delete_file(storage_key: str) -> None:
        client.delete_object(Bucket=settings.STORAGE_BUCKET_NAME, Key=storage_key)
    except ClientError:
        logger.warning(f"Failed to delete S3 object: {storage_key}")
 # ── Vision helpers (resize + fetch for AI) ─────────────────────
 # Claude vision costs: (width × height) / 750 tokens per image.
 # Claude auto-resizes images >1568px on the longest edge.
 # We resize server-side to avoid sending multi-MB base64 payloads over the wire.
 MAX_IMAGE_DIMENSION = 1568  # Claude's max efficient resolution
 MAX_IMAGES_PER_MESSAGE = 3  # Cap to control token budget
 def resize_image_for_vision(file_data: bytes, content_type: str) -> tuple[bytes, str]:
    """Resize image to fit within Claude's efficient vision bounds.
    Returns (resized_bytes, media_type). Converts PNG screenshots to JPEG
    when it reduces size significantly (screenshots are often huge PNGs).
    """
    try:
        from PIL import Image
        img = Image.open(BytesIO(file_data))
        w, h = img.size
        # Only resize if larger than Claude's max efficient dimension
        if max(w, h) > MAX_IMAGE_DIMENSION:
            ratio = MAX_IMAGE_DIMENSION / max(w, h)
            new_w, new_h = int(w * ratio), int(h * ratio)
            img = img.resize((new_w, new_h), Image.LANCZOS)
        # Convert RGBA (common in screenshots) to RGB for JPEG
        out_type = content_type
        if img.mode in ("RGBA", "P") and content_type == "image/png":
            img = img.convert("RGB")
            out_type = "image/jpeg"
        buf = BytesIO()
        if out_type == "image/jpeg":
            img.save(buf, format="JPEG", quality=85, optimize=True)
        else:
            img.save(buf, format=img.format or "PNG", optimize=True)
        result = buf.getvalue()
        # Only use resized version if it's actually smaller
        if len(result) < len(file_data):
            return result, out_type
        return file_data, content_type
    except ImportError:
        # Pillow not installed — send original (Claude auto-resizes)
        logger.debug("Pillow not available, sending original image to Claude")
        return file_data, content_type
    except Exception:
        logger.warning("Image resize failed, sending original")
        return file_data, content_type
 async def fetch_upload_images(
    upload_ids: list[UUID],
    account_id: UUID,
    db: Any,
 ) -> list[dict[str, Any]]:
    """Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision.
    Resizes images server-side to reduce network payload and applies a per-message
    cap to control token budget (~1,600 tokens per full-res image).
    """
    if not upload_ids or not settings.STORAGE_ENDPOINT:
        return []
    from sqlalchemy import select
    from app.models.file_upload import FileUpload
    # Cap the number of images to limit token cost
    capped_ids = upload_ids[:MAX_IMAGES_PER_MESSAGE]
    if len(upload_ids) > MAX_IMAGES_PER_MESSAGE:
        logger.info(
            "Capped images from %d to %d for token budget",
            len(upload_ids), MAX_IMAGES_PER_MESSAGE,
        )
    result = await db.execute(
        select(FileUpload).where(
            FileUpload.id.in_(capped_ids),
            FileUpload.account_id == account_id,
            FileUpload.content_type.in_(ALLOWED_IMAGE_TYPES),
        )
    )
    uploads = result.scalars().all()
    images: list[dict[str, Any]] = []
    for upload in uploads:
        try:
            file_data = download_file(upload.storage_key)
            resized_data, media_type = resize_image_for_vision(
                file_data, upload.content_type
            )
            images.append({
                "media_type": media_type,
                "data": base64.b64encode(resized_data).decode("ascii"),
            })
        except Exception:
            logger.warning("Failed to fetch upload %s from S3", upload.id)
    return images
--- a/frontend/src/api/assistantChat.ts
+++ b/frontend/src/api/assistantChat.ts
@@ -1,52 +1,13 @@
 import apiClient from './client'
-import type {
+import type { RetentionSettings } from '@/types/assistant-chat'
  AssistantChat,
  ChatListItem,
  ChatMessageResponse,
  RetentionSettings,
  ConcludeChatRequest,
  ConcludeChatResponse,
 } from '@/types/assistant-chat'
 /**
 * Chat retention settings API.
 *
 * Note: Chat CRUD methods were removed — the frontend uses aiSessionsApi
 * for all chat operations. Only retention settings remain on the /assistant prefix.
 */
 export const assistantChatApi = {
  async createChat(): Promise<AssistantChat> {
    const response = await apiClient.post<AssistantChat>('/assistant/chats', {})
    return response.data
  },
  async listChats(page = 1, size = 20): Promise<ChatListItem[]> {
    const response = await apiClient.get<ChatListItem[]>('/assistant/chats', {
      params: { page, size },
    })
    return response.data
  },
  async getChat(chatId: string): Promise<AssistantChat> {
    const response = await apiClient.get<AssistantChat>(`/assistant/chats/${chatId}`)
    return response.data
  },
  async sendMessage(chatId: string, message: string): Promise<ChatMessageResponse> {
    const response = await apiClient.post<ChatMessageResponse>(
      `/assistant/chats/${chatId}/messages`,
      { message }
    )
    return response.data
  },
  async updateChat(chatId: string, data: { title?: string; pinned?: boolean }): Promise<AssistantChat> {
    const response = await apiClient.patch<AssistantChat>(`/assistant/chats/${chatId}`, data)
    return response.data
  },
  async deleteChat(chatId: string): Promise<void> {
    await apiClient.delete(`/assistant/chats/${chatId}`)
  },
  async bulkDeleteChats(olderThanDays: number): Promise<void> {
    await apiClient.delete('/assistant/chats', { params: { older_than_days: olderThanDays } })
  },
  async getRetentionSettings(): Promise<RetentionSettings> {
    const response = await apiClient.get<RetentionSettings>('/assistant/retention')
    return response.data
@@ -56,14 +17,6 @@ export const assistantChatApi = {
    const response = await apiClient.patch<RetentionSettings>('/assistant/retention', data)
    return response.data
  },
  async concludeChat(chatId: string, data: ConcludeChatRequest): Promise<ConcludeChatResponse> {
    const response = await apiClient.post<ConcludeChatResponse>(
      `/assistant/chats/${chatId}/conclude`,
      data
    )
    return response.data
  },
 }
 export default assistantChatApi
--- a/frontend/src/types/assistant-chat.ts
+++ b/frontend/src/types/assistant-chat.ts
@@ -1,20 +1,3 @@
 import type { SuggestedFlow } from './copilot'
 export interface AssistantChat {
  id: string
  title: string
  messages: AssistantChatMessage[]
  message_count: number
  pinned: boolean
  created_at: string
  updated_at: string
 }
 export interface AssistantChatMessage {
  role: 'user' | 'assistant'
  content: string
 }
 export interface ChatListItem {
  id: string
  title: string
@@ -24,27 +7,9 @@ export interface ChatListItem {
  updated_at: string
 }
 export interface ChatMessageResponse {
  content: string
  suggested_flows: SuggestedFlow[]
 }
 export interface RetentionSettings {
  chat_retention_days: number | null
  chat_retention_max_count: number | null
 }
 export type ConclusionOutcome = 'resolved' | 'escalated' | 'paused'
 export interface ConcludeChatRequest {
  outcome: ConclusionOutcome
  notes?: string
 }
 export interface ConcludeChatResponse {
  summary: string
  outcome: ConclusionOutcome
  concluded_at: string
 }
 export type { SuggestedFlow }
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -11,7 +11,7 @@ export type { Account, Subscription, PlanLimits, SubscriptionDetails, AccountInv
 export * from './admin'
 export * from './analytics'
 export * from './copilot'
-export type { AssistantChat, AssistantChatMessage, ChatListItem, ChatMessageResponse, RetentionSettings } from './assistant-chat'
+export type { ChatListItem, RetentionSettings, ConclusionOutcome } from './assistant-chat'
 export * from './ai-session'
 export * from './flow-proposal'
 export * from './flowpilot-analytics'