feat: wire image uploads into AI assistant chat (vision support)
- Backend: ChatMessageRequest accepts upload_ids, endpoint fetches images from S3, base64-encodes them, passes to Claude as multimodal content blocks (vision API) - Backend: add download_file() to storage_service for fetching from S3 - Frontend: handleSend collects completed upload IDs from pendingUploads and includes them in the sendChatMessage API call - Frontend: prefill handler passes upload IDs from dashboard nav state - Enables paste-screenshot → AI-sees-it flow end-to-end Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,9 +10,10 @@
|
||||
GET /assistant/retention — Get account retention settings
|
||||
PATCH /assistant/retention — Update retention settings (owner only)
|
||||
"""
|
||||
import base64
|
||||
import logging
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Annotated, Optional
|
||||
from typing import Annotated, Any, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
||||
@@ -26,6 +27,7 @@ from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_
|
||||
from app.models.user import User
|
||||
from app.models.account import Account
|
||||
from app.models.assistant_chat import AssistantChat
|
||||
from app.models.file_upload import FileUpload
|
||||
from app.schemas.assistant_chat import (
|
||||
ChatCreateRequest,
|
||||
ChatMessageRequest,
|
||||
@@ -46,6 +48,42 @@ logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/assistant", tags=["assistant-chat"])
|
||||
|
||||
|
||||
VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
|
||||
|
||||
|
||||
async def _fetch_upload_images(
|
||||
upload_ids: list[UUID],
|
||||
account_id: UUID,
|
||||
db: AsyncSession,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision."""
|
||||
if not upload_ids or not settings.STORAGE_ENDPOINT:
|
||||
return []
|
||||
|
||||
from app.services import storage_service
|
||||
|
||||
result = await db.execute(
|
||||
select(FileUpload).where(
|
||||
FileUpload.id.in_(upload_ids),
|
||||
FileUpload.account_id == account_id,
|
||||
FileUpload.content_type.in_(VISION_CONTENT_TYPES),
|
||||
)
|
||||
)
|
||||
uploads = result.scalars().all()
|
||||
|
||||
images: list[dict[str, Any]] = []
|
||||
for upload in uploads:
|
||||
try:
|
||||
file_data = storage_service.download_file(upload.storage_key)
|
||||
images.append({
|
||||
"media_type": upload.content_type,
|
||||
"data": base64.b64encode(file_data).decode("ascii"),
|
||||
})
|
||||
except Exception:
|
||||
logger.warning("Failed to fetch upload %s from S3", upload.id)
|
||||
return images
|
||||
|
||||
|
||||
def _require_ai_enabled() -> None:
|
||||
if not settings.ai_enabled:
|
||||
raise HTTPException(
|
||||
@@ -151,6 +189,9 @@ async def post_message(
|
||||
user_id = current_user.id
|
||||
account_id = current_user.account_id
|
||||
|
||||
# Fetch attached images from S3 (if any)
|
||||
images = await _fetch_upload_images(data.upload_ids, account_id, db)
|
||||
|
||||
try:
|
||||
ai_content, suggested_flows, chat = await assistant_chat_service.send_message(
|
||||
chat_id=chat_id,
|
||||
@@ -158,6 +199,7 @@ async def post_message(
|
||||
account_id=account_id,
|
||||
message=data.message,
|
||||
db=db,
|
||||
images=images or None,
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
|
||||
|
||||
@@ -14,6 +14,7 @@ class ChatCreateRequest(BaseModel):
|
||||
|
||||
class ChatMessageRequest(BaseModel):
|
||||
message: str = Field(..., min_length=1, max_length=8000)
|
||||
upload_ids: list[UUID] = Field(default_factory=list, max_length=10)
|
||||
|
||||
|
||||
class ChatMessageResponse(BaseModel):
|
||||
|
||||
@@ -87,6 +87,7 @@ async def _call_ai(
|
||||
history: list[dict[str, Any]],
|
||||
new_message: str,
|
||||
max_tokens: int = 4096,
|
||||
images: list[dict[str, Any]] | None = None,
|
||||
) -> tuple[str, int, int]:
|
||||
"""Call the AI with prompt caching when using Anthropic.
|
||||
|
||||
@@ -95,13 +96,18 @@ async def _call_ai(
|
||||
- RAG context: NOT cached (changes per query)
|
||||
- Conversation history prefix: cached via breakpoint on last
|
||||
existing message (stable — only new user message is uncached)
|
||||
|
||||
Args:
|
||||
images: Optional list of {"media_type": str, "data": str (base64)}
|
||||
to include alongside the new_message as vision content.
|
||||
"""
|
||||
if settings.AI_PROVIDER == "anthropic" and settings.ANTHROPIC_API_KEY:
|
||||
return await _call_anthropic_cached(
|
||||
system_base, rag_context, history, new_message, max_tokens
|
||||
system_base, rag_context, history, new_message, max_tokens,
|
||||
images=images,
|
||||
)
|
||||
|
||||
# Fallback: generic provider (Gemini, etc.)
|
||||
# Fallback: generic provider (Gemini, etc.) — images not supported
|
||||
from app.core.ai_provider import get_ai_provider
|
||||
|
||||
system_prompt = system_base + rag_context
|
||||
@@ -120,6 +126,7 @@ async def _call_anthropic_cached(
|
||||
history: list[dict[str, Any]],
|
||||
new_message: str,
|
||||
max_tokens: int,
|
||||
images: list[dict[str, Any]] | None = None,
|
||||
) -> tuple[str, int, int]:
|
||||
"""Call Anthropic with prompt caching on system prompt and history.
|
||||
|
||||
@@ -168,7 +175,22 @@ async def _call_anthropic_cached(
|
||||
}
|
||||
|
||||
# Add the new user message (uncached — it's new each turn)
|
||||
messages.append({"role": "user", "content": new_message})
|
||||
# If images are attached, build multimodal content blocks
|
||||
if images:
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
for img in images:
|
||||
content_blocks.append({
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": img["media_type"],
|
||||
"data": img["data"],
|
||||
},
|
||||
})
|
||||
content_blocks.append({"type": "text", "text": new_message})
|
||||
messages.append({"role": "user", "content": content_blocks})
|
||||
else:
|
||||
messages.append({"role": "user", "content": new_message})
|
||||
|
||||
# MCP server config (optional — controlled by settings)
|
||||
mcp_servers = anthropic.NOT_GIVEN
|
||||
@@ -386,9 +408,14 @@ async def send_message(
|
||||
account_id: UUID,
|
||||
message: str,
|
||||
db: AsyncSession,
|
||||
images: list[dict[str, Any]] | None = None,
|
||||
) -> tuple[str, list[dict[str, Any]], AssistantChat]:
|
||||
"""Send a user message and get AI response.
|
||||
|
||||
Args:
|
||||
images: Optional list of {"media_type": str, "data": str (base64)}
|
||||
for vision content attached to this message.
|
||||
|
||||
Returns (ai_content, suggested_flows, chat).
|
||||
"""
|
||||
result = await db.execute(
|
||||
@@ -427,6 +454,7 @@ async def send_message(
|
||||
rag_context=rag_context,
|
||||
history=ai_messages,
|
||||
new_message=message,
|
||||
images=images,
|
||||
)
|
||||
|
||||
# Update chat
|
||||
|
||||
@@ -67,6 +67,14 @@ async def upload_file(
|
||||
return storage_key
|
||||
|
||||
|
||||
def download_file(storage_key: str) -> bytes:
|
||||
"""Download a file from S3 and return its contents as bytes."""
|
||||
client = _get_client()
|
||||
buf = BytesIO()
|
||||
client.download_fileobj(settings.STORAGE_BUCKET_NAME, storage_key, buf)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def get_presigned_url(storage_key: str) -> str:
|
||||
"""Generate a time-limited presigned URL for downloading a file."""
|
||||
client = _get_client()
|
||||
|
||||
@@ -55,7 +55,9 @@ export default function AssistantChatPage() {
|
||||
|
||||
// Handle prefill from command palette / dashboard handoff
|
||||
useEffect(() => {
|
||||
const prefill = (location.state as { prefill?: string } | null)?.prefill
|
||||
const state = location.state as { prefill?: string; uploadIds?: string[] } | null
|
||||
const prefill = state?.prefill
|
||||
const uploadIds = state?.uploadIds
|
||||
if (!prefill || prefillHandledRef.current) return
|
||||
prefillHandledRef.current = true
|
||||
|
||||
@@ -80,7 +82,10 @@ export default function AssistantChatPage() {
|
||||
setMessages([{ role: 'user', content: prefill }])
|
||||
setLoading(true)
|
||||
|
||||
const response = await aiSessionsApi.sendChatMessage(session.session_id, { message: prefill })
|
||||
const response = await aiSessionsApi.sendChatMessage(session.session_id, {
|
||||
message: prefill,
|
||||
upload_ids: uploadIds?.length ? uploadIds : undefined,
|
||||
})
|
||||
setMessages(prev => [
|
||||
...prev,
|
||||
{ role: 'assistant', content: response.content, suggestedFlows: response.suggested_flows },
|
||||
@@ -183,12 +188,19 @@ export default function AssistantChatPage() {
|
||||
if (!input.trim() || !activeChatId || loading) return
|
||||
|
||||
const userMessage = input.trim()
|
||||
const completedUploadIds = pendingUploads
|
||||
.filter((u) => u.status === 'done' && u.result?.id)
|
||||
.map((u) => u.result!.id)
|
||||
setInput('')
|
||||
setPendingUploads([])
|
||||
setMessages(prev => [...prev, { role: 'user', content: userMessage }])
|
||||
setLoading(true)
|
||||
|
||||
try {
|
||||
const response = await aiSessionsApi.sendChatMessage(activeChatId, { message: userMessage })
|
||||
const response = await aiSessionsApi.sendChatMessage(activeChatId, {
|
||||
message: userMessage,
|
||||
upload_ids: completedUploadIds.length > 0 ? completedUploadIds : undefined,
|
||||
})
|
||||
analytics.aiFeatureUsed({ feature: 'assistant_chat' })
|
||||
setMessages(prev => [
|
||||
...prev,
|
||||
|
||||
@@ -216,6 +216,7 @@ export interface ChatSessionCreateResponse {
|
||||
|
||||
export interface ChatMessageRequest {
|
||||
message: string
|
||||
upload_ids?: string[]
|
||||
}
|
||||
|
||||
export interface ChatMessageResponse {
|
||||
|
||||
Reference in New Issue
Block a user