feat: wire image uploads into AI assistant chat (vision support)

- Backend: ChatMessageRequest accepts upload_ids, endpoint fetches
  images from S3, base64-encodes them, passes to Claude as multimodal
  content blocks (vision API)
- Backend: add download_file() to storage_service for fetching from S3
- Frontend: handleSend collects completed upload IDs from pendingUploads
  and includes them in the sendChatMessage API call
- Frontend: prefill handler passes upload IDs from dashboard nav state
- Enables paste-screenshot → AI-sees-it flow end-to-end

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
chihlasm
2026-03-24 04:39:54 +00:00
parent 48f2b3faaf
commit 3b682069d3
6 changed files with 99 additions and 7 deletions

View File

@@ -10,9 +10,10 @@
GET /assistant/retention — Get account retention settings
PATCH /assistant/retention — Update retention settings (owner only)
"""
import base64
import logging
from datetime import datetime, timezone, timedelta
from typing import Annotated, Optional
from typing import Annotated, Any, Optional
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
@@ -26,6 +27,7 @@ from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_
from app.models.user import User
from app.models.account import Account
from app.models.assistant_chat import AssistantChat
from app.models.file_upload import FileUpload
from app.schemas.assistant_chat import (
ChatCreateRequest,
ChatMessageRequest,
@@ -46,6 +48,42 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/assistant", tags=["assistant-chat"])
VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
async def _fetch_upload_images(
upload_ids: list[UUID],
account_id: UUID,
db: AsyncSession,
) -> list[dict[str, Any]]:
"""Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision."""
if not upload_ids or not settings.STORAGE_ENDPOINT:
return []
from app.services import storage_service
result = await db.execute(
select(FileUpload).where(
FileUpload.id.in_(upload_ids),
FileUpload.account_id == account_id,
FileUpload.content_type.in_(VISION_CONTENT_TYPES),
)
)
uploads = result.scalars().all()
images: list[dict[str, Any]] = []
for upload in uploads:
try:
file_data = storage_service.download_file(upload.storage_key)
images.append({
"media_type": upload.content_type,
"data": base64.b64encode(file_data).decode("ascii"),
})
except Exception:
logger.warning("Failed to fetch upload %s from S3", upload.id)
return images
def _require_ai_enabled() -> None:
if not settings.ai_enabled:
raise HTTPException(
@@ -151,6 +189,9 @@ async def post_message(
user_id = current_user.id
account_id = current_user.account_id
# Fetch attached images from S3 (if any)
images = await _fetch_upload_images(data.upload_ids, account_id, db)
try:
ai_content, suggested_flows, chat = await assistant_chat_service.send_message(
chat_id=chat_id,
@@ -158,6 +199,7 @@ async def post_message(
account_id=account_id,
message=data.message,
db=db,
images=images or None,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))

View File

@@ -14,6 +14,7 @@ class ChatCreateRequest(BaseModel):
class ChatMessageRequest(BaseModel):
message: str = Field(..., min_length=1, max_length=8000)
upload_ids: list[UUID] = Field(default_factory=list, max_length=10)
class ChatMessageResponse(BaseModel):

View File

@@ -87,6 +87,7 @@ async def _call_ai(
history: list[dict[str, Any]],
new_message: str,
max_tokens: int = 4096,
images: list[dict[str, Any]] | None = None,
) -> tuple[str, int, int]:
"""Call the AI with prompt caching when using Anthropic.
@@ -95,13 +96,18 @@ async def _call_ai(
- RAG context: NOT cached (changes per query)
- Conversation history prefix: cached via breakpoint on last
existing message (stable — only new user message is uncached)
Args:
images: Optional list of {"media_type": str, "data": str (base64)}
to include alongside the new_message as vision content.
"""
if settings.AI_PROVIDER == "anthropic" and settings.ANTHROPIC_API_KEY:
return await _call_anthropic_cached(
system_base, rag_context, history, new_message, max_tokens
system_base, rag_context, history, new_message, max_tokens,
images=images,
)
# Fallback: generic provider (Gemini, etc.)
# Fallback: generic provider (Gemini, etc.) — images not supported
from app.core.ai_provider import get_ai_provider
system_prompt = system_base + rag_context
@@ -120,6 +126,7 @@ async def _call_anthropic_cached(
history: list[dict[str, Any]],
new_message: str,
max_tokens: int,
images: list[dict[str, Any]] | None = None,
) -> tuple[str, int, int]:
"""Call Anthropic with prompt caching on system prompt and history.
@@ -168,7 +175,22 @@ async def _call_anthropic_cached(
}
# Add the new user message (uncached — it's new each turn)
messages.append({"role": "user", "content": new_message})
# If images are attached, build multimodal content blocks
if images:
content_blocks: list[dict[str, Any]] = []
for img in images:
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": img["media_type"],
"data": img["data"],
},
})
content_blocks.append({"type": "text", "text": new_message})
messages.append({"role": "user", "content": content_blocks})
else:
messages.append({"role": "user", "content": new_message})
# MCP server config (optional — controlled by settings)
mcp_servers = anthropic.NOT_GIVEN
@@ -386,9 +408,14 @@ async def send_message(
account_id: UUID,
message: str,
db: AsyncSession,
images: list[dict[str, Any]] | None = None,
) -> tuple[str, list[dict[str, Any]], AssistantChat]:
"""Send a user message and get AI response.
Args:
images: Optional list of {"media_type": str, "data": str (base64)}
for vision content attached to this message.
Returns (ai_content, suggested_flows, chat).
"""
result = await db.execute(
@@ -427,6 +454,7 @@ async def send_message(
rag_context=rag_context,
history=ai_messages,
new_message=message,
images=images,
)
# Update chat

View File

@@ -67,6 +67,14 @@ async def upload_file(
return storage_key
def download_file(storage_key: str) -> bytes:
"""Download a file from S3 and return its contents as bytes."""
client = _get_client()
buf = BytesIO()
client.download_fileobj(settings.STORAGE_BUCKET_NAME, storage_key, buf)
return buf.getvalue()
def get_presigned_url(storage_key: str) -> str:
"""Generate a time-limited presigned URL for downloading a file."""
client = _get_client()

View File

@@ -55,7 +55,9 @@ export default function AssistantChatPage() {
// Handle prefill from command palette / dashboard handoff
useEffect(() => {
const prefill = (location.state as { prefill?: string } | null)?.prefill
const state = location.state as { prefill?: string; uploadIds?: string[] } | null
const prefill = state?.prefill
const uploadIds = state?.uploadIds
if (!prefill || prefillHandledRef.current) return
prefillHandledRef.current = true
@@ -80,7 +82,10 @@ export default function AssistantChatPage() {
setMessages([{ role: 'user', content: prefill }])
setLoading(true)
const response = await aiSessionsApi.sendChatMessage(session.session_id, { message: prefill })
const response = await aiSessionsApi.sendChatMessage(session.session_id, {
message: prefill,
upload_ids: uploadIds?.length ? uploadIds : undefined,
})
setMessages(prev => [
...prev,
{ role: 'assistant', content: response.content, suggestedFlows: response.suggested_flows },
@@ -183,12 +188,19 @@ export default function AssistantChatPage() {
if (!input.trim() || !activeChatId || loading) return
const userMessage = input.trim()
const completedUploadIds = pendingUploads
.filter((u) => u.status === 'done' && u.result?.id)
.map((u) => u.result!.id)
setInput('')
setPendingUploads([])
setMessages(prev => [...prev, { role: 'user', content: userMessage }])
setLoading(true)
try {
const response = await aiSessionsApi.sendChatMessage(activeChatId, { message: userMessage })
const response = await aiSessionsApi.sendChatMessage(activeChatId, {
message: userMessage,
upload_ids: completedUploadIds.length > 0 ? completedUploadIds : undefined,
})
analytics.aiFeatureUsed({ feature: 'assistant_chat' })
setMessages(prev => [
...prev,

View File

@@ -216,6 +216,7 @@ export interface ChatSessionCreateResponse {
export interface ChatMessageRequest {
message: string
upload_ids?: string[]
}
export interface ChatMessageResponse {