refactor: remove dead assistant_chat system, consolidate image helpers

The old /assistant/chats/* CRUD endpoints and assistant_chat_service chat functions were unused — the frontend exclusively uses /ai-sessions/{id}/chat (unified_chat_service) for all chat operations. Removed: - Chat CRUD endpoints (create, list, get, send, delete, conclude) - assistant_chat_service: create_chat, send_message, generate_conclusion_summary, CONCLUSION_SYSTEM_PROMPT - Frontend: assistantChatApi chat methods, dead types (AssistantChat, AssistantChatMessage, ConcludeChatRequest, etc.) Kept: - /assistant/retention endpoints (used by ChatRetentionSettingsPage) - Shared AI infrastructure (_call_ai, _call_anthropic_cached, ASSISTANT_SYSTEM_PROMPT, _auto_title) — imported by unified_chat_service Moved: - fetch_upload_images + resize_image_for_vision → storage_service.py (shared location, not tied to dead endpoint) Also added "Image Analysis" section to system prompt so Claude knows to describe attached screenshots. -650 lines of dead code removed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 05:28:06 +00:00
parent 36ca830481
commit 8e7f13d2f8
8 changed files with 141 additions and 791 deletions
--- a/backend/app/api/endpoints/ai_sessions.py
+++ b/backend/app/api/endpoints/ai_sessions.py
@@ -283,8 +283,8 @@ async def send_chat_message(
    # Fetch attached images from S3 (if any)
    images = None
    if data.upload_ids:
-        from app.api.endpoints.assistant_chat import _fetch_upload_images
-        images = await _fetch_upload_images(data.upload_ids, account_id, db) or None
+        from app.services.storage_service import fetch_upload_images
+        images = await fetch_upload_images(data.upload_ids, account_id, db) or None

    try:
        ai_content, suggested_flows, session = await unified_chat_service.send_chat_message(
--- a/backend/app/api/endpoints/assistant_chat.py
+++ b/backend/app/api/endpoints/assistant_chat.py
@@ -1,453 +1,29 @@
-"""Standalone AI assistant chat endpoints.
+"""Chat retention settings endpoints.

-  POST   /assistant/chats              — Create new chat
-  GET    /assistant/chats              — List chats (paginated, newest first)
-  GET    /assistant/chats/{id}         — Get chat with messages
-  POST   /assistant/chats/{id}/messages — Send message
-  PATCH  /assistant/chats/{id}         — Update title, pin/unpin
-  DELETE /assistant/chats/{id}         — Delete single chat
-  DELETE /assistant/chats              — Bulk delete (older_than_days query param)
  GET    /assistant/retention          — Get account retention settings
  PATCH  /assistant/retention          — Update retention settings (owner only)
-"""
-import base64
-import logging
-from datetime import datetime, timezone, timedelta
-from typing import Annotated, Any, Optional
-from uuid import UUID

-from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
-from sqlalchemy import select, delete, func
+Note: Chat CRUD endpoints were removed — the frontend uses /ai-sessions/{id}/chat
+(unified_chat_service) for all chat operations. The /assistant prefix is kept for
+the retention settings to avoid a frontend URL change.
+"""
+from typing import Annotated, Optional
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.core.rate_limit import limiter
-from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
-from app.core.config import settings
-from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan
+from app.api.deps import get_current_active_user, get_db
 from app.models.user import User
 from app.models.account import Account
-from app.models.assistant_chat import AssistantChat
-from app.models.file_upload import FileUpload
 from app.schemas.assistant_chat import (
-    ChatCreateRequest,
-    ChatMessageRequest,
-    ChatMessageResponse,
-    ChatListResponse,
-    ChatDetailResponse,
-    ChatUpdateRequest,
    RetentionSettingsResponse,
    RetentionSettingsUpdate,
-    ConcludeChatRequest,
-    ConcludeChatResponse,
 )
-from app.schemas.copilot import SuggestedFlow
-from app.services import assistant_chat_service
-
-logger = logging.getLogger(__name__)

 router = APIRouter(prefix="/assistant", tags=["assistant-chat"])


-VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
-
-# Claude vision costs: (width × height) / 750 tokens per image.
-# Claude auto-resizes images >1568px on the longest edge.
-# We resize server-side to avoid sending multi-MB base64 payloads over the wire.
-MAX_IMAGE_DIMENSION = 1568  # Claude's max efficient resolution
-MAX_IMAGES_PER_MESSAGE = 3  # Cap to control token budget
-
-
-def _resize_image_for_vision(file_data: bytes, content_type: str) -> tuple[bytes, str]:
-    """Resize image to fit within Claude's efficient vision bounds.
-
-    Returns (resized_bytes, media_type). Converts PNG screenshots to JPEG
-    when it reduces size significantly (screenshots are often huge PNGs).
-    """
-    try:
-        from PIL import Image
-        from io import BytesIO
-
-        img = Image.open(BytesIO(file_data))
-        w, h = img.size
-
-        # Only resize if larger than Claude's max efficient dimension
-        if max(w, h) > MAX_IMAGE_DIMENSION:
-            ratio = MAX_IMAGE_DIMENSION / max(w, h)
-            new_w, new_h = int(w * ratio), int(h * ratio)
-            img = img.resize((new_w, new_h), Image.LANCZOS)
-
-        # Convert RGBA (common in screenshots) to RGB for JPEG
-        out_type = content_type
-        if img.mode in ("RGBA", "P") and content_type == "image/png":
-            img = img.convert("RGB")
-            out_type = "image/jpeg"
-
-        buf = BytesIO()
-        if out_type == "image/jpeg":
-            img.save(buf, format="JPEG", quality=85, optimize=True)
-        else:
-            img.save(buf, format=img.format or "PNG", optimize=True)
-
-        result = buf.getvalue()
-
-        # Only use resized version if it's actually smaller
-        if len(result) < len(file_data):
-            return result, out_type
-        return file_data, content_type
-
-    except ImportError:
-        # Pillow not installed — send original (Claude auto-resizes)
-        logger.debug("Pillow not available, sending original image to Claude")
-        return file_data, content_type
-    except Exception:
-        logger.warning("Image resize failed, sending original")
-        return file_data, content_type
-
-
-async def _fetch_upload_images(
-    upload_ids: list[UUID],
-    account_id: UUID,
-    db: AsyncSession,
-) -> list[dict[str, Any]]:
-    """Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision.
-
-    Resizes images server-side to reduce network payload and applies a per-message
-    cap to control token budget (~1,600 tokens per full-res image).
-    """
-    if not upload_ids or not settings.STORAGE_ENDPOINT:
-        return []
-
-    from app.services import storage_service
-
-    # Cap the number of images to limit token cost
-    capped_ids = upload_ids[:MAX_IMAGES_PER_MESSAGE]
-    if len(upload_ids) > MAX_IMAGES_PER_MESSAGE:
-        logger.info(
-            "Capped images from %d to %d for token budget",
-            len(upload_ids), MAX_IMAGES_PER_MESSAGE,
-        )
-
-    result = await db.execute(
-        select(FileUpload).where(
-            FileUpload.id.in_(capped_ids),
-            FileUpload.account_id == account_id,
-            FileUpload.content_type.in_(VISION_CONTENT_TYPES),
-        )
-    )
-    uploads = result.scalars().all()
-
-    images: list[dict[str, Any]] = []
-    for upload in uploads:
-        try:
-            file_data = storage_service.download_file(upload.storage_key)
-            resized_data, media_type = _resize_image_for_vision(
-                file_data, upload.content_type
-            )
-            images.append({
-                "media_type": media_type,
-                "data": base64.b64encode(resized_data).decode("ascii"),
-            })
-        except Exception:
-            logger.warning("Failed to fetch upload %s from S3", upload.id)
-    return images
-
-
-def _require_ai_enabled() -> None:
-    if not settings.ai_enabled:
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.",
-        )
-
-
-@router.post("/chats", response_model=ChatDetailResponse, status_code=201)
-@limiter.limit("10/minute")
-async def create_chat(
-    request: Request,
-    data: ChatCreateRequest,
-    current_user: Annotated[User, Depends(get_current_active_user)],
-    db: Annotated[AsyncSession, Depends(get_db)],
-    _: None = Depends(require_engineer_or_admin),
-):
-    """Create a new empty chat conversation."""
-    chat = await assistant_chat_service.create_chat(
-        user_id=current_user.id,
-        account_id=current_user.account_id,
-        db=db,
-    )
-    await db.commit()
-    return ChatDetailResponse.model_validate(chat)
-
-
-@router.get("/chats", response_model=list[ChatListResponse])
-async def list_chats(
-    current_user: Annotated[User, Depends(get_current_active_user)],
-    db: Annotated[AsyncSession, Depends(get_db)],
-    page: int = Query(1, ge=1),
-    size: int = Query(20, ge=1, le=100),
-):
-    """List user's chat conversations (newest first, pinned on top)."""
-    offset = (page - 1) * size
-    result = await db.execute(
-        select(AssistantChat)
-        .where(AssistantChat.user_id == current_user.id)
-        .order_by(AssistantChat.pinned.desc(), AssistantChat.updated_at.desc())
-        .offset(offset)
-        .limit(size)
-    )
-    chats = result.scalars().all()
-    return [ChatListResponse.model_validate(c) for c in chats]
-
-
-@router.get("/chats/{chat_id}", response_model=ChatDetailResponse)
-async def get_chat(
-    chat_id: UUID,
-    current_user: Annotated[User, Depends(get_current_active_user)],
-    db: Annotated[AsyncSession, Depends(get_db)],
-):
-    """Get a chat with full message history."""
-    result = await db.execute(
-        select(AssistantChat).where(
-            AssistantChat.id == chat_id,
-            AssistantChat.user_id == current_user.id,
-        )
-    )
-    chat = result.scalar_one_or_none()
-    if not chat:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
-    return ChatDetailResponse.model_validate(chat)
-
-
-@router.post("/chats/{chat_id}/messages", response_model=ChatMessageResponse)
-@limiter.limit("10/minute")
-async def post_message(
-    request: Request,
-    chat_id: UUID,
-    data: ChatMessageRequest,
-    current_user: Annotated[User, Depends(get_current_active_user)],
-    db: Annotated[AsyncSession, Depends(get_db)],
-    _: None = Depends(require_engineer_or_admin),
-):
-    """Send a message and get AI response."""
-    _require_ai_enabled()
-
-    allowed, quota_status = await check_ai_quota(
-        user_id=current_user.id,
-        account_id=current_user.account_id,
-        db=db,
-        billing_anchor=current_user.ai_billing_cycle_anchor_at,
-        is_super_admin=current_user.is_super_admin,
-    )
-    if not allowed:
-        reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at"
-        raise HTTPException(
-            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
-            detail={
-                "message": f"AI limit exceeded ({quota_status['deny_reason']})",
-                "reset_at": quota_status.get(reset_key),
-                "quota": quota_status,
-            },
-        )
-
-    plan = await get_user_plan(current_user.account_id, db)
-
-    # Capture scalar fields before the try block — after db.rollback()
-    # the ORM objects are expired and accessing attributes triggers a
-    # lazy load, which crashes in async context (MissingGreenlet).
-    user_id = current_user.id
-    account_id = current_user.account_id
-
-    # Fetch attached images from S3 (if any)
-    images = await _fetch_upload_images(data.upload_ids, account_id, db)
-
-    try:
-        ai_content, suggested_flows, chat = await assistant_chat_service.send_message(
-            chat_id=chat_id,
-            user_id=user_id,
-            account_id=account_id,
-            message=data.message,
-            db=db,
-            images=images or None,
-        )
-    except ValueError as e:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
-    except Exception as e:
-        logger.exception("Assistant chat message failed: %s", e)
-        await db.rollback()
-        await record_ai_usage(
-            user_id=user_id,
-            account_id=account_id,
-            conversation_id=None,
-            generation_type="assistant_message",
-            tier=plan,
-            input_tokens=0,
-            output_tokens=0,
-            estimated_cost=0,
-            succeeded=False,
-            counts_toward_quota=False,
-            error_code=type(e).__name__,
-            extra_data={"assistant_chat_id": str(chat_id)},
-            db=db,
-        )
-        await db.commit()
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail=f"AI provider error ({type(e).__name__}). Please try again.",
-        )
-
-    await record_ai_usage(
-        user_id=user_id,
-        account_id=account_id,
-        conversation_id=None,
-        generation_type="assistant_message",
-        tier=plan,
-        input_tokens=chat.total_input_tokens,
-        output_tokens=chat.total_output_tokens,
-        estimated_cost=(
-            chat.total_input_tokens * 1.0 / 1_000_000
-            + chat.total_output_tokens * 5.0 / 1_000_000
-        ),
-        succeeded=True,
-        counts_toward_quota=False,
-        error_code=None,
-        extra_data={"assistant_chat_id": str(chat_id)},
-        db=db,
-    )
-    await db.commit()
-
-    return ChatMessageResponse(
-        content=ai_content,
-        suggested_flows=[SuggestedFlow.model_validate(sf) for sf in suggested_flows],
-    )
-
-
-@router.post("/chats/{chat_id}/conclude", response_model=ConcludeChatResponse)
-@limiter.limit("10/minute")
-async def conclude_chat(
-    request: Request,
-    chat_id: UUID,
-    data: ConcludeChatRequest,
-    current_user: Annotated[User, Depends(get_current_active_user)],
-    db: Annotated[AsyncSession, Depends(get_db)],
-    _: None = Depends(require_engineer_or_admin),
-):
-    """Conclude a chat session and generate ticket-ready summary."""
-    _require_ai_enabled()
-
-    result = await db.execute(
-        select(AssistantChat).where(
-            AssistantChat.id == chat_id,
-            AssistantChat.user_id == current_user.id,
-        )
-    )
-    chat = result.scalar_one_or_none()
-    if not chat:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
-
-    if chat.concluded_at:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Chat already concluded",
-        )
-
-    if chat.message_count < 2:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Chat must have at least one exchange before concluding",
-        )
-
-    try:
-        summary = await assistant_chat_service.generate_conclusion_summary(
-            chat=chat,
-            outcome=data.outcome,
-            notes=data.notes,
-        )
-    except Exception as e:
-        logger.exception("Failed to generate conclusion summary: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_502_BAD_GATEWAY,
-            detail="Failed to generate summary. Please try again.",
-        )
-
-    now = datetime.now(timezone.utc)
-    chat.conclusion_outcome = data.outcome
-    chat.conclusion_summary = summary
-    chat.concluded_at = now
-    await db.commit()
-
-    return ConcludeChatResponse(
-        summary=summary,
-        outcome=data.outcome,
-        concluded_at=now,
-    )
-
-
-@router.patch("/chats/{chat_id}", response_model=ChatDetailResponse)
-async def update_chat(
-    chat_id: UUID,
-    data: ChatUpdateRequest,
-    current_user: Annotated[User, Depends(get_current_active_user)],
-    db: Annotated[AsyncSession, Depends(get_db)],
-):
-    """Update chat title or pin/unpin."""
-    result = await db.execute(
-        select(AssistantChat).where(
-            AssistantChat.id == chat_id,
-            AssistantChat.user_id == current_user.id,
-        )
-    )
-    chat = result.scalar_one_or_none()
-    if not chat:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
-
-    if data.title is not None:
-        chat.title = data.title
-    if data.pinned is not None:
-        chat.pinned = data.pinned
-
-    await db.commit()
-    return ChatDetailResponse.model_validate(chat)
-
-
-@router.delete("/chats/{chat_id}", status_code=204)
-async def delete_chat(
-    chat_id: UUID,
-    current_user: Annotated[User, Depends(get_current_active_user)],
-    db: Annotated[AsyncSession, Depends(get_db)],
-):
-    """Delete a single chat."""
-    result = await db.execute(
-        select(AssistantChat).where(
-            AssistantChat.id == chat_id,
-            AssistantChat.user_id == current_user.id,
-        )
-    )
-    chat = result.scalar_one_or_none()
-    if not chat:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
-
-    await db.delete(chat)
-    await db.commit()
-
-
-@router.delete("/chats", status_code=204)
-async def bulk_delete_chats(
-    current_user: Annotated[User, Depends(get_current_active_user)],
-    db: Annotated[AsyncSession, Depends(get_db)],
-    older_than_days: int = Query(..., ge=1),
-):
-    """Bulk delete chats older than N days (skips pinned)."""
-    cutoff = datetime.now(timezone.utc) - timedelta(days=older_than_days)
-    await db.execute(
-        delete(AssistantChat).where(
-            AssistantChat.user_id == current_user.id,
-            AssistantChat.pinned == False,  # noqa: E712
-            AssistantChat.updated_at < cutoff,
-        )
-    )
-    await db.commit()
-
-
@router.get("/retention", response_model=RetentionSettingsResponse)
 async def get_retention_settings(
    current_user: Annotated[User, Depends(get_current_active_user)],
--- a/backend/app/schemas/assistant_chat.py
+++ b/backend/app/schemas/assistant_chat.py
@@ -1,54 +1,11 @@
-"""Pydantic schemas for standalone AI assistant chat."""
-from typing import Optional, Any, Literal
-from uuid import UUID
-from datetime import datetime
+"""Pydantic schemas for chat retention settings.
+
+Chat CRUD schemas were removed — the active chat system uses
+schemas from ai_session.py via the /ai-sessions endpoints.
+"""
+from typing import Optional
 from pydantic import BaseModel, Field

-from app.schemas.copilot import SuggestedFlow
-
-
-class ChatCreateRequest(BaseModel):
-    """Empty body — creates a new blank conversation."""
-    pass
-
-
-class ChatMessageRequest(BaseModel):
-    message: str = Field(..., min_length=1, max_length=8000)
-    upload_ids: list[UUID] = Field(default_factory=list, max_length=10)
-
-
-class ChatMessageResponse(BaseModel):
-    content: str
-    suggested_flows: list[SuggestedFlow] = []
-
-
-class ChatListResponse(BaseModel):
-    id: UUID
-    title: str
-    message_count: int
-    pinned: bool
-    created_at: datetime
-    updated_at: datetime
-
-    model_config = {"from_attributes": True}
-
-
-class ChatDetailResponse(BaseModel):
-    id: UUID
-    title: str
-    messages: list[dict[str, Any]]
-    message_count: int
-    pinned: bool
-    created_at: datetime
-    updated_at: datetime
-
-    model_config = {"from_attributes": True}
-
-
-class ChatUpdateRequest(BaseModel):
-    title: Optional[str] = Field(None, min_length=1, max_length=255)
-    pinned: Optional[bool] = None
-

 class RetentionSettingsResponse(BaseModel):
    chat_retention_days: Optional[int]
@@ -58,14 +15,3 @@ class RetentionSettingsResponse(BaseModel):
 class RetentionSettingsUpdate(BaseModel):
    chat_retention_days: Optional[int] = Field(None, ge=1, le=365)
    chat_retention_max_count: Optional[int] = Field(None, ge=10, le=10000)
-
-
-class ConcludeChatRequest(BaseModel):
-    outcome: Literal["resolved", "escalated", "paused"]
-    notes: Optional[str] = Field(None, max_length=2000)
-
-
-class ConcludeChatResponse(BaseModel):
-    summary: str
-    outcome: str
-    concluded_at: datetime
--- a/backend/app/services/assistant_chat_service.py
+++ b/backend/app/services/assistant_chat_service.py
@@ -1,7 +1,7 @@
-"""Standalone AI assistant chat service with RAG context.
+"""Shared AI chat infrastructure — system prompt, prompt caching, and AI calling.

-Provides persistent conversation history for general IT questions
-with semantic search over the team's flow library.
+Used by unified_chat_service (the active chat backend). The assistant_chat
+CRUD endpoints were removed — only retention settings remain on that router.

 Uses Anthropic prompt caching to reduce cost on multi-turn conversations:
 - The static system prompt is cached (ephemeral, 5-min TTL)
@@ -13,14 +13,8 @@ for real-time documentation lookups (controlled by ENABLE_MCP_MICROSOFT_LEARN).
 """
 import logging
 from typing import Any
-from uuid import UUID
-
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession

 from app.core.config import settings
-from app.models.assistant_chat import AssistantChat
-from app.services.rag_service import search as rag_search, build_rag_context, extract_suggested_flows

 logger = logging.getLogger(__name__)

@@ -74,6 +68,11 @@ You have access to Microsoft's official documentation via Microsoft Learn. Use i
 - No team flow covers the topic and vendor-specific detail would help
 Do NOT use Microsoft Learn for every question — only when official docs add real value.

+## Image Analysis
+When an image is attached, analyze it carefully. Screenshots of error messages, \
+config panels, event viewer logs, and network diagrams are common in MSP work. \
+Describe what you see and use the visual information to inform your troubleshooting advice.
+
 ## Boundaries
 - Stay focused on IT infrastructure, systems administration, and MSP operations.
 - If a question is clearly outside your domain, say so briefly and redirect.
@@ -273,199 +272,3 @@ def _auto_title(message: str) -> str:
    if len(message) > 100:
        title = title.rsplit(" ", 1)[0] + "..."
    return title
-
-
-CONCLUSION_SYSTEM_PROMPT = """\
-You are a ticket documentation specialist for MSP (Managed Service Provider) teams. \
-Your job is to transform an AI troubleshooting conversation into clean, professional \
-ticket notes that can be pasted directly into a PSA/ticketing system (ConnectWise, \
-Autotask, HaloPSA, etc.).
-
-## Output Format
-
-Generate a structured summary using this exact format:
-
-**Subject:** [One-line summary of the issue]
-
-**Outcome:** {outcome_label}
-
-**Problem Description:**
-[2-3 sentence summary of the original problem]
-
-**Steps Taken:**
-1. [Step] — [Result/finding]
-2. [Step] — [Result/finding]
-(list all troubleshooting steps from the conversation)
-
-**Current Status:**
-[Where things stand now — what was resolved, what remains]
-
-{notes_section}
-
-**Key Findings:**
- [Important discovery or configuration detail]
- [Any relevant error codes, settings, or values identified]
-
-{resume_section}
-
-## Rules
- Be concise but thorough — these notes will be read by another engineer
- Include specific technical details (commands run, error messages, config values)
- Use plain text formatting (no HTML) — bold with ** is fine
- Do NOT include conversational filler, greetings, or meta-commentary
- Extract ALL actionable steps from the conversation, in chronological order
- If the conversation identified root cause, state it clearly
-"""
-
-
-async def generate_conclusion_summary(
-    chat: "AssistantChat",
-    outcome: str,
-    notes: str | None = None,
-) -> str:
-    """Generate a ticket-ready summary from a concluded chat conversation."""
-    outcome_labels = {
-        "resolved": "Resolved",
-        "escalated": "Escalated",
-        "paused": "Paused — To Be Continued",
-    }
-    outcome_label = outcome_labels.get(outcome, outcome)
-
-    notes_section = ""
-    if notes:
-        notes_section = f"\n**Engineer Notes:**\n{notes}\n"
-
-    resume_section = ""
-    if outcome == "paused":
-        resume_section = (
-            "\n**Next Steps (for resumption):**\n"
-            "- [What needs to happen next]\n"
-            "- [Any pending actions or follow-ups]\n"
-        )
-    elif outcome == "escalated":
-        resume_section = (
-            "\n**Escalation Details:**\n"
-            "- [Reason for escalation]\n"
-            "- [Recommended next steps for receiving team/tier]\n"
-        )
-
-    # Build the conversation transcript for the AI
-    transcript_lines = []
-    for msg in chat.messages:
-        role_label = "ENGINEER" if msg["role"] == "user" else "AI ASSISTANT"
-        transcript_lines.append(f"[{role_label}]: {msg['content']}")
-
-    transcript = "\n\n".join(transcript_lines)
-
-    prompt = (
-        f"Outcome: {outcome_label}\n\n"
-        f"{'Engineer Notes: ' + notes if notes else '(No additional notes)'}\n\n"
-        f"--- CONVERSATION TRANSCRIPT ---\n\n{transcript}\n\n"
-        f"--- END TRANSCRIPT ---\n\n"
-        f"Generate the ticket notes now. Replace all placeholder brackets with actual content from the conversation. "
-        f"The notes_section placeholder should be: {notes_section or '(omit this section)'}\n"
-        f"The resume_section placeholder should be filled based on the conversation context."
-    )
-
-    system_with_vars = CONCLUSION_SYSTEM_PROMPT.replace(
-        "{outcome_label}", outcome_label
-    ).replace(
-        "{notes_section}", notes_section or ""
-    ).replace(
-        "{resume_section}", resume_section
-    )
-
-    content, _, _ = await _call_ai(
-        system_base=system_with_vars,
-        rag_context="",
-        history=[],
-        new_message=prompt,
-        max_tokens=2048,
-    )
-
-    return content
-
-
-async def create_chat(
-    user_id: UUID,
-    account_id: UUID,
-    db: AsyncSession,
-) -> AssistantChat:
-    """Create a new empty chat."""
-    chat = AssistantChat(
-        user_id=user_id,
-        account_id=account_id,
-        messages=[],
-    )
-    db.add(chat)
-    await db.flush()
-    return chat
-
-
-async def send_message(
-    chat_id: UUID,
-    user_id: UUID,
-    account_id: UUID,
-    message: str,
-    db: AsyncSession,
-    images: list[dict[str, Any]] | None = None,
-) -> tuple[str, list[dict[str, Any]], AssistantChat]:
-    """Send a user message and get AI response.
-
-    Args:
-        images: Optional list of {"media_type": str, "data": str (base64)}
-                for vision content attached to this message.
-
-    Returns (ai_content, suggested_flows, chat).
-    """
-    result = await db.execute(
-        select(AssistantChat).where(
-            AssistantChat.id == chat_id,
-            AssistantChat.user_id == user_id,
-        )
-    )
-    chat = result.scalar_one_or_none()
-    if not chat:
-        raise ValueError("Chat not found")
-
-    # Auto-title from first message
-    if chat.message_count == 0:
-        chat.title = _auto_title(message)
-
-    # RAG search
-    rag_results = await rag_search(
-        query=message,
-        account_id=account_id,
-        db=db,
-        limit=8,
-    )
-
-    rag_context = build_rag_context(rag_results)
-
-    # Build messages for AI
-    ai_messages: list[dict[str, Any]] = []
-    for msg in chat.messages:
-        if msg["role"] in ("user", "assistant"):
-            ai_messages.append({"role": msg["role"], "content": msg["content"]})
-
-    # Call AI with prompt caching (Anthropic) or generic provider
-    ai_content, input_tokens, output_tokens = await _call_ai(
-        system_base=ASSISTANT_SYSTEM_PROMPT,
-        rag_context=rag_context,
-        history=ai_messages,
-        new_message=message,
-        images=images,
-    )
-
-    # Update chat
-    msgs = list(chat.messages)
-    msgs.append({"role": "user", "content": message})
-    msgs.append({"role": "assistant", "content": ai_content})
-    chat.messages = msgs
-    chat.message_count += 2
-    chat.total_input_tokens += input_tokens
-    chat.total_output_tokens += output_tokens
-
-    suggested_flows = extract_suggested_flows(rag_results)
-
-    return ai_content, suggested_flows, chat
--- a/backend/app/services/storage_service.py
+++ b/backend/app/services/storage_service.py
@@ -1,7 +1,10 @@
 """S3-compatible object storage service for file uploads."""
+import base64
 import logging
 import uuid
 from io import BytesIO
+from typing import Any
+from uuid import UUID

 import boto3
 from botocore.config import Config as BotoConfig
@@ -92,3 +95,107 @@ async def delete_file(storage_key: str) -> None:
        client.delete_object(Bucket=settings.STORAGE_BUCKET_NAME, Key=storage_key)
    except ClientError:
        logger.warning(f"Failed to delete S3 object: {storage_key}")
+
+
+# ── Vision helpers (resize + fetch for AI) ─────────────────────
+
+# Claude vision costs: (width × height) / 750 tokens per image.
+# Claude auto-resizes images >1568px on the longest edge.
+# We resize server-side to avoid sending multi-MB base64 payloads over the wire.
+MAX_IMAGE_DIMENSION = 1568  # Claude's max efficient resolution
+MAX_IMAGES_PER_MESSAGE = 3  # Cap to control token budget
+
+
+def resize_image_for_vision(file_data: bytes, content_type: str) -> tuple[bytes, str]:
+    """Resize image to fit within Claude's efficient vision bounds.
+
+    Returns (resized_bytes, media_type). Converts PNG screenshots to JPEG
+    when it reduces size significantly (screenshots are often huge PNGs).
+    """
+    try:
+        from PIL import Image
+
+        img = Image.open(BytesIO(file_data))
+        w, h = img.size
+
+        # Only resize if larger than Claude's max efficient dimension
+        if max(w, h) > MAX_IMAGE_DIMENSION:
+            ratio = MAX_IMAGE_DIMENSION / max(w, h)
+            new_w, new_h = int(w * ratio), int(h * ratio)
+            img = img.resize((new_w, new_h), Image.LANCZOS)
+
+        # Convert RGBA (common in screenshots) to RGB for JPEG
+        out_type = content_type
+        if img.mode in ("RGBA", "P") and content_type == "image/png":
+            img = img.convert("RGB")
+            out_type = "image/jpeg"
+
+        buf = BytesIO()
+        if out_type == "image/jpeg":
+            img.save(buf, format="JPEG", quality=85, optimize=True)
+        else:
+            img.save(buf, format=img.format or "PNG", optimize=True)
+
+        result = buf.getvalue()
+
+        # Only use resized version if it's actually smaller
+        if len(result) < len(file_data):
+            return result, out_type
+        return file_data, content_type
+
+    except ImportError:
+        # Pillow not installed — send original (Claude auto-resizes)
+        logger.debug("Pillow not available, sending original image to Claude")
+        return file_data, content_type
+    except Exception:
+        logger.warning("Image resize failed, sending original")
+        return file_data, content_type
+
+
+async def fetch_upload_images(
+    upload_ids: list[UUID],
+    account_id: UUID,
+    db: Any,
+) -> list[dict[str, Any]]:
+    """Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision.
+
+    Resizes images server-side to reduce network payload and applies a per-message
+    cap to control token budget (~1,600 tokens per full-res image).
+    """
+    if not upload_ids or not settings.STORAGE_ENDPOINT:
+        return []
+
+    from sqlalchemy import select
+    from app.models.file_upload import FileUpload
+
+    # Cap the number of images to limit token cost
+    capped_ids = upload_ids[:MAX_IMAGES_PER_MESSAGE]
+    if len(upload_ids) > MAX_IMAGES_PER_MESSAGE:
+        logger.info(
+            "Capped images from %d to %d for token budget",
+            len(upload_ids), MAX_IMAGES_PER_MESSAGE,
+        )
+
+    result = await db.execute(
+        select(FileUpload).where(
+            FileUpload.id.in_(capped_ids),
+            FileUpload.account_id == account_id,
+            FileUpload.content_type.in_(ALLOWED_IMAGE_TYPES),
+        )
+    )
+    uploads = result.scalars().all()
+
+    images: list[dict[str, Any]] = []
+    for upload in uploads:
+        try:
+            file_data = download_file(upload.storage_key)
+            resized_data, media_type = resize_image_for_vision(
+                file_data, upload.content_type
+            )
+            images.append({
+                "media_type": media_type,
+                "data": base64.b64encode(resized_data).decode("ascii"),
+            })
+        except Exception:
+            logger.warning("Failed to fetch upload %s from S3", upload.id)
+    return images
--- a/frontend/src/api/assistantChat.ts
+++ b/frontend/src/api/assistantChat.ts
@@ -1,52 +1,13 @@
 import apiClient from './client'
-import type {
-  AssistantChat,
-  ChatListItem,
-  ChatMessageResponse,
-  RetentionSettings,
-  ConcludeChatRequest,
-  ConcludeChatResponse,
-} from '@/types/assistant-chat'
+import type { RetentionSettings } from '@/types/assistant-chat'

+/**
+ * Chat retention settings API.
+ *
+ * Note: Chat CRUD methods were removed — the frontend uses aiSessionsApi
+ * for all chat operations. Only retention settings remain on the /assistant prefix.
+ */
 export const assistantChatApi = {
-  async createChat(): Promise<AssistantChat> {
-    const response = await apiClient.post<AssistantChat>('/assistant/chats', {})
-    return response.data
-  },
-
-  async listChats(page = 1, size = 20): Promise<ChatListItem[]> {
-    const response = await apiClient.get<ChatListItem[]>('/assistant/chats', {
-      params: { page, size },
-    })
-    return response.data
-  },
-
-  async getChat(chatId: string): Promise<AssistantChat> {
-    const response = await apiClient.get<AssistantChat>(`/assistant/chats/${chatId}`)
-    return response.data
-  },
-
-  async sendMessage(chatId: string, message: string): Promise<ChatMessageResponse> {
-    const response = await apiClient.post<ChatMessageResponse>(
-      `/assistant/chats/${chatId}/messages`,
-      { message }
-    )
-    return response.data
-  },
-
-  async updateChat(chatId: string, data: { title?: string; pinned?: boolean }): Promise<AssistantChat> {
-    const response = await apiClient.patch<AssistantChat>(`/assistant/chats/${chatId}`, data)
-    return response.data
-  },
-
-  async deleteChat(chatId: string): Promise<void> {
-    await apiClient.delete(`/assistant/chats/${chatId}`)
-  },
-
-  async bulkDeleteChats(olderThanDays: number): Promise<void> {
-    await apiClient.delete('/assistant/chats', { params: { older_than_days: olderThanDays } })
-  },
-
  async getRetentionSettings(): Promise<RetentionSettings> {
    const response = await apiClient.get<RetentionSettings>('/assistant/retention')
    return response.data
@@ -56,14 +17,6 @@ export const assistantChatApi = {
    const response = await apiClient.patch<RetentionSettings>('/assistant/retention', data)
    return response.data
  },
-
-  async concludeChat(chatId: string, data: ConcludeChatRequest): Promise<ConcludeChatResponse> {
-    const response = await apiClient.post<ConcludeChatResponse>(
-      `/assistant/chats/${chatId}/conclude`,
-      data
-    )
-    return response.data
-  },
 }

 export default assistantChatApi
--- a/frontend/src/types/assistant-chat.ts
+++ b/frontend/src/types/assistant-chat.ts
@@ -1,20 +1,3 @@
-import type { SuggestedFlow } from './copilot'
-
-export interface AssistantChat {
-  id: string
-  title: string
-  messages: AssistantChatMessage[]
-  message_count: number
-  pinned: boolean
-  created_at: string
-  updated_at: string
-}
-
-export interface AssistantChatMessage {
-  role: 'user' | 'assistant'
-  content: string
-}
-
 export interface ChatListItem {
  id: string
  title: string
@@ -24,27 +7,9 @@ export interface ChatListItem {
  updated_at: string
 }

-export interface ChatMessageResponse {
-  content: string
-  suggested_flows: SuggestedFlow[]
-}
-
 export interface RetentionSettings {
  chat_retention_days: number | null
  chat_retention_max_count: number | null
 }

 export type ConclusionOutcome = 'resolved' | 'escalated' | 'paused'
-
-export interface ConcludeChatRequest {
-  outcome: ConclusionOutcome
-  notes?: string
-}
-
-export interface ConcludeChatResponse {
-  summary: string
-  outcome: ConclusionOutcome
-  concluded_at: string
-}
-
-export type { SuggestedFlow }
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -11,7 +11,7 @@ export type { Account, Subscription, PlanLimits, SubscriptionDetails, AccountInv
 export * from './admin'
 export * from './analytics'
 export * from './copilot'
-export type { AssistantChat, AssistantChatMessage, ChatListItem, ChatMessageResponse, RetentionSettings } from './assistant-chat'
+export type { ChatListItem, RetentionSettings, ConclusionOutcome } from './assistant-chat'
 export * from './ai-session'
 export * from './flow-proposal'
 export * from './flowpilot-analytics'