feat: wire image uploads into AI assistant chat (vision support)

- Backend: ChatMessageRequest accepts upload_ids, endpoint fetches images from S3, base64-encodes them, passes to Claude as multimodal content blocks (vision API) - Backend: add download_file() to storage_service for fetching from S3 - Frontend: handleSend collects completed upload IDs from pendingUploads and includes them in the sendChatMessage API call - Frontend: prefill handler passes upload IDs from dashboard nav state - Enables paste-screenshot → AI-sees-it flow end-to-end Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 04:39:54 +00:00
parent 48f2b3faaf
commit 3b682069d3
6 changed files with 99 additions and 7 deletions
--- a/backend/app/api/endpoints/assistant_chat.py
+++ b/backend/app/api/endpoints/assistant_chat.py
@@ -10,9 +10,10 @@
  GET    /assistant/retention          — Get account retention settings
  PATCH  /assistant/retention          — Update retention settings (owner only)
 """
+import base64
 import logging
 from datetime import datetime, timezone, timedelta
-from typing import Annotated, Optional
+from typing import Annotated, Any, Optional
 from uuid import UUID

 from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
@@ -26,6 +27,7 @@ from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_
 from app.models.user import User
 from app.models.account import Account
 from app.models.assistant_chat import AssistantChat
+from app.models.file_upload import FileUpload
 from app.schemas.assistant_chat import (
    ChatCreateRequest,
    ChatMessageRequest,
@@ -46,6 +48,42 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/assistant", tags=["assistant-chat"])


+VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
+
+
+async def _fetch_upload_images(
+    upload_ids: list[UUID],
+    account_id: UUID,
+    db: AsyncSession,
+) -> list[dict[str, Any]]:
+    """Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision."""
+    if not upload_ids or not settings.STORAGE_ENDPOINT:
+        return []
+
+    from app.services import storage_service
+
+    result = await db.execute(
+        select(FileUpload).where(
+            FileUpload.id.in_(upload_ids),
+            FileUpload.account_id == account_id,
+            FileUpload.content_type.in_(VISION_CONTENT_TYPES),
+        )
+    )
+    uploads = result.scalars().all()
+
+    images: list[dict[str, Any]] = []
+    for upload in uploads:
+        try:
+            file_data = storage_service.download_file(upload.storage_key)
+            images.append({
+                "media_type": upload.content_type,
+                "data": base64.b64encode(file_data).decode("ascii"),
+            })
+        except Exception:
+            logger.warning("Failed to fetch upload %s from S3", upload.id)
+    return images
+
+
 def _require_ai_enabled() -> None:
    if not settings.ai_enabled:
        raise HTTPException(
@@ -151,6 +189,9 @@ async def post_message(
    user_id = current_user.id
    account_id = current_user.account_id

+    # Fetch attached images from S3 (if any)
+    images = await _fetch_upload_images(data.upload_ids, account_id, db)
+
    try:
        ai_content, suggested_flows, chat = await assistant_chat_service.send_message(
            chat_id=chat_id,
@@ -158,6 +199,7 @@ async def post_message(
            account_id=account_id,
            message=data.message,
            db=db,
+            images=images or None,
        )
    except ValueError as e:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))