diff --git a/backend/app/api/endpoints/assistant_chat.py b/backend/app/api/endpoints/assistant_chat.py index 3bcc1f5e..0c13d3e2 100644 --- a/backend/app/api/endpoints/assistant_chat.py +++ b/backend/app/api/endpoints/assistant_chat.py @@ -10,9 +10,10 @@ GET /assistant/retention — Get account retention settings PATCH /assistant/retention — Update retention settings (owner only) """ +import base64 import logging from datetime import datetime, timezone, timedelta -from typing import Annotated, Optional +from typing import Annotated, Any, Optional from uuid import UUID from fastapi import APIRouter, Depends, HTTPException, Query, Request, status @@ -26,6 +27,7 @@ from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_ from app.models.user import User from app.models.account import Account from app.models.assistant_chat import AssistantChat +from app.models.file_upload import FileUpload from app.schemas.assistant_chat import ( ChatCreateRequest, ChatMessageRequest, @@ -46,6 +48,42 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/assistant", tags=["assistant-chat"]) +VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"} + + +async def _fetch_upload_images( + upload_ids: list[UUID], + account_id: UUID, + db: AsyncSession, +) -> list[dict[str, Any]]: + """Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision.""" + if not upload_ids or not settings.STORAGE_ENDPOINT: + return [] + + from app.services import storage_service + + result = await db.execute( + select(FileUpload).where( + FileUpload.id.in_(upload_ids), + FileUpload.account_id == account_id, + FileUpload.content_type.in_(VISION_CONTENT_TYPES), + ) + ) + uploads = result.scalars().all() + + images: list[dict[str, Any]] = [] + for upload in uploads: + try: + file_data = storage_service.download_file(upload.storage_key) + images.append({ + "media_type": upload.content_type, + "data": base64.b64encode(file_data).decode("ascii"), + }) + except Exception: + logger.warning("Failed to fetch upload %s from S3", upload.id) + return images + + def _require_ai_enabled() -> None: if not settings.ai_enabled: raise HTTPException( @@ -151,6 +189,9 @@ async def post_message( user_id = current_user.id account_id = current_user.account_id + # Fetch attached images from S3 (if any) + images = await _fetch_upload_images(data.upload_ids, account_id, db) + try: ai_content, suggested_flows, chat = await assistant_chat_service.send_message( chat_id=chat_id, @@ -158,6 +199,7 @@ async def post_message( account_id=account_id, message=data.message, db=db, + images=images or None, ) except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) diff --git a/backend/app/schemas/assistant_chat.py b/backend/app/schemas/assistant_chat.py index 5d7fc6af..6eeecbc1 100644 --- a/backend/app/schemas/assistant_chat.py +++ b/backend/app/schemas/assistant_chat.py @@ -14,6 +14,7 @@ class ChatCreateRequest(BaseModel): class ChatMessageRequest(BaseModel): message: str = Field(..., min_length=1, max_length=8000) + upload_ids: list[UUID] = Field(default_factory=list, max_length=10) class ChatMessageResponse(BaseModel): diff --git a/backend/app/services/assistant_chat_service.py b/backend/app/services/assistant_chat_service.py index 9508fe56..e5287b7d 100644 --- a/backend/app/services/assistant_chat_service.py +++ b/backend/app/services/assistant_chat_service.py @@ -87,6 +87,7 @@ async def _call_ai( history: list[dict[str, Any]], new_message: str, max_tokens: int = 4096, + images: list[dict[str, Any]] | None = None, ) -> tuple[str, int, int]: """Call the AI with prompt caching when using Anthropic. @@ -95,13 +96,18 @@ async def _call_ai( - RAG context: NOT cached (changes per query) - Conversation history prefix: cached via breakpoint on last existing message (stable — only new user message is uncached) + + Args: + images: Optional list of {"media_type": str, "data": str (base64)} + to include alongside the new_message as vision content. """ if settings.AI_PROVIDER == "anthropic" and settings.ANTHROPIC_API_KEY: return await _call_anthropic_cached( - system_base, rag_context, history, new_message, max_tokens + system_base, rag_context, history, new_message, max_tokens, + images=images, ) - # Fallback: generic provider (Gemini, etc.) + # Fallback: generic provider (Gemini, etc.) — images not supported from app.core.ai_provider import get_ai_provider system_prompt = system_base + rag_context @@ -120,6 +126,7 @@ async def _call_anthropic_cached( history: list[dict[str, Any]], new_message: str, max_tokens: int, + images: list[dict[str, Any]] | None = None, ) -> tuple[str, int, int]: """Call Anthropic with prompt caching on system prompt and history. @@ -168,7 +175,22 @@ async def _call_anthropic_cached( } # Add the new user message (uncached — it's new each turn) - messages.append({"role": "user", "content": new_message}) + # If images are attached, build multimodal content blocks + if images: + content_blocks: list[dict[str, Any]] = [] + for img in images: + content_blocks.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": img["media_type"], + "data": img["data"], + }, + }) + content_blocks.append({"type": "text", "text": new_message}) + messages.append({"role": "user", "content": content_blocks}) + else: + messages.append({"role": "user", "content": new_message}) # MCP server config (optional — controlled by settings) mcp_servers = anthropic.NOT_GIVEN @@ -386,9 +408,14 @@ async def send_message( account_id: UUID, message: str, db: AsyncSession, + images: list[dict[str, Any]] | None = None, ) -> tuple[str, list[dict[str, Any]], AssistantChat]: """Send a user message and get AI response. + Args: + images: Optional list of {"media_type": str, "data": str (base64)} + for vision content attached to this message. + Returns (ai_content, suggested_flows, chat). """ result = await db.execute( @@ -427,6 +454,7 @@ async def send_message( rag_context=rag_context, history=ai_messages, new_message=message, + images=images, ) # Update chat diff --git a/backend/app/services/storage_service.py b/backend/app/services/storage_service.py index d6e9399c..5ff4d67f 100644 --- a/backend/app/services/storage_service.py +++ b/backend/app/services/storage_service.py @@ -67,6 +67,14 @@ async def upload_file( return storage_key +def download_file(storage_key: str) -> bytes: + """Download a file from S3 and return its contents as bytes.""" + client = _get_client() + buf = BytesIO() + client.download_fileobj(settings.STORAGE_BUCKET_NAME, storage_key, buf) + return buf.getvalue() + + def get_presigned_url(storage_key: str) -> str: """Generate a time-limited presigned URL for downloading a file.""" client = _get_client() diff --git a/frontend/src/pages/AssistantChatPage.tsx b/frontend/src/pages/AssistantChatPage.tsx index cb0a7723..2cb0058c 100644 --- a/frontend/src/pages/AssistantChatPage.tsx +++ b/frontend/src/pages/AssistantChatPage.tsx @@ -55,7 +55,9 @@ export default function AssistantChatPage() { // Handle prefill from command palette / dashboard handoff useEffect(() => { - const prefill = (location.state as { prefill?: string } | null)?.prefill + const state = location.state as { prefill?: string; uploadIds?: string[] } | null + const prefill = state?.prefill + const uploadIds = state?.uploadIds if (!prefill || prefillHandledRef.current) return prefillHandledRef.current = true @@ -80,7 +82,10 @@ export default function AssistantChatPage() { setMessages([{ role: 'user', content: prefill }]) setLoading(true) - const response = await aiSessionsApi.sendChatMessage(session.session_id, { message: prefill }) + const response = await aiSessionsApi.sendChatMessage(session.session_id, { + message: prefill, + upload_ids: uploadIds?.length ? uploadIds : undefined, + }) setMessages(prev => [ ...prev, { role: 'assistant', content: response.content, suggestedFlows: response.suggested_flows }, @@ -183,12 +188,19 @@ export default function AssistantChatPage() { if (!input.trim() || !activeChatId || loading) return const userMessage = input.trim() + const completedUploadIds = pendingUploads + .filter((u) => u.status === 'done' && u.result?.id) + .map((u) => u.result!.id) setInput('') + setPendingUploads([]) setMessages(prev => [...prev, { role: 'user', content: userMessage }]) setLoading(true) try { - const response = await aiSessionsApi.sendChatMessage(activeChatId, { message: userMessage }) + const response = await aiSessionsApi.sendChatMessage(activeChatId, { + message: userMessage, + upload_ids: completedUploadIds.length > 0 ? completedUploadIds : undefined, + }) analytics.aiFeatureUsed({ feature: 'assistant_chat' }) setMessages(prev => [ ...prev, diff --git a/frontend/src/types/ai-session.ts b/frontend/src/types/ai-session.ts index 2fe3f7d8..94bc6d08 100644 --- a/frontend/src/types/ai-session.ts +++ b/frontend/src/types/ai-session.ts @@ -216,6 +216,7 @@ export interface ChatSessionCreateResponse { export interface ChatMessageRequest { message: string + upload_ids?: string[] } export interface ChatMessageResponse {