From 8e7f13d2f89e9c852d485562b9347e10ee55c361 Mon Sep 17 00:00:00 2001 From: chihlasm Date: Tue, 24 Mar 2026 05:28:06 +0000 Subject: [PATCH] refactor: remove dead assistant_chat system, consolidate image helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old /assistant/chats/* CRUD endpoints and assistant_chat_service chat functions were unused — the frontend exclusively uses /ai-sessions/{id}/chat (unified_chat_service) for all chat operations. Removed: - Chat CRUD endpoints (create, list, get, send, delete, conclude) - assistant_chat_service: create_chat, send_message, generate_conclusion_summary, CONCLUSION_SYSTEM_PROMPT - Frontend: assistantChatApi chat methods, dead types (AssistantChat, AssistantChatMessage, ConcludeChatRequest, etc.) Kept: - /assistant/retention endpoints (used by ChatRetentionSettingsPage) - Shared AI infrastructure (_call_ai, _call_anthropic_cached, ASSISTANT_SYSTEM_PROMPT, _auto_title) — imported by unified_chat_service Moved: - fetch_upload_images + resize_image_for_vision → storage_service.py (shared location, not tied to dead endpoint) Also added "Image Analysis" section to system prompt so Claude knows to describe attached screenshots. -650 lines of dead code removed. Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/app/api/endpoints/ai_sessions.py | 4 +- backend/app/api/endpoints/assistant_chat.py | 444 +----------------- backend/app/schemas/assistant_chat.py | 66 +-- .../app/services/assistant_chat_service.py | 213 +-------- backend/app/services/storage_service.py | 107 +++++ frontend/src/api/assistantChat.ts | 61 +-- frontend/src/types/assistant-chat.ts | 35 -- frontend/src/types/index.ts | 2 +- 8 files changed, 141 insertions(+), 791 deletions(-) diff --git a/backend/app/api/endpoints/ai_sessions.py b/backend/app/api/endpoints/ai_sessions.py index 0be32bbc..c6379220 100644 --- a/backend/app/api/endpoints/ai_sessions.py +++ b/backend/app/api/endpoints/ai_sessions.py @@ -283,8 +283,8 @@ async def send_chat_message( # Fetch attached images from S3 (if any) images = None if data.upload_ids: - from app.api.endpoints.assistant_chat import _fetch_upload_images - images = await _fetch_upload_images(data.upload_ids, account_id, db) or None + from app.services.storage_service import fetch_upload_images + images = await fetch_upload_images(data.upload_ids, account_id, db) or None try: ai_content, suggested_flows, session = await unified_chat_service.send_chat_message( diff --git a/backend/app/api/endpoints/assistant_chat.py b/backend/app/api/endpoints/assistant_chat.py index 83188213..18ca8f65 100644 --- a/backend/app/api/endpoints/assistant_chat.py +++ b/backend/app/api/endpoints/assistant_chat.py @@ -1,453 +1,29 @@ -"""Standalone AI assistant chat endpoints. +"""Chat retention settings endpoints. - POST /assistant/chats — Create new chat - GET /assistant/chats — List chats (paginated, newest first) - GET /assistant/chats/{id} — Get chat with messages - POST /assistant/chats/{id}/messages — Send message - PATCH /assistant/chats/{id} — Update title, pin/unpin - DELETE /assistant/chats/{id} — Delete single chat - DELETE /assistant/chats — Bulk delete (older_than_days query param) GET /assistant/retention — Get account retention settings PATCH /assistant/retention — Update retention settings (owner only) -""" -import base64 -import logging -from datetime import datetime, timezone, timedelta -from typing import Annotated, Any, Optional -from uuid import UUID -from fastapi import APIRouter, Depends, HTTPException, Query, Request, status -from sqlalchemy import select, delete, func +Note: Chat CRUD endpoints were removed — the frontend uses /ai-sessions/{id}/chat +(unified_chat_service) for all chat operations. The /assistant prefix is kept for +the retention settings to avoid a frontend URL change. +""" +from typing import Annotated, Optional + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.core.rate_limit import limiter -from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin -from app.core.config import settings -from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan +from app.api.deps import get_current_active_user, get_db from app.models.user import User from app.models.account import Account -from app.models.assistant_chat import AssistantChat -from app.models.file_upload import FileUpload from app.schemas.assistant_chat import ( - ChatCreateRequest, - ChatMessageRequest, - ChatMessageResponse, - ChatListResponse, - ChatDetailResponse, - ChatUpdateRequest, RetentionSettingsResponse, RetentionSettingsUpdate, - ConcludeChatRequest, - ConcludeChatResponse, ) -from app.schemas.copilot import SuggestedFlow -from app.services import assistant_chat_service - -logger = logging.getLogger(__name__) router = APIRouter(prefix="/assistant", tags=["assistant-chat"]) -VISION_CONTENT_TYPES = {"image/png", "image/jpeg", "image/gif", "image/webp"} - -# Claude vision costs: (width × height) / 750 tokens per image. -# Claude auto-resizes images >1568px on the longest edge. -# We resize server-side to avoid sending multi-MB base64 payloads over the wire. -MAX_IMAGE_DIMENSION = 1568 # Claude's max efficient resolution -MAX_IMAGES_PER_MESSAGE = 3 # Cap to control token budget - - -def _resize_image_for_vision(file_data: bytes, content_type: str) -> tuple[bytes, str]: - """Resize image to fit within Claude's efficient vision bounds. - - Returns (resized_bytes, media_type). Converts PNG screenshots to JPEG - when it reduces size significantly (screenshots are often huge PNGs). - """ - try: - from PIL import Image - from io import BytesIO - - img = Image.open(BytesIO(file_data)) - w, h = img.size - - # Only resize if larger than Claude's max efficient dimension - if max(w, h) > MAX_IMAGE_DIMENSION: - ratio = MAX_IMAGE_DIMENSION / max(w, h) - new_w, new_h = int(w * ratio), int(h * ratio) - img = img.resize((new_w, new_h), Image.LANCZOS) - - # Convert RGBA (common in screenshots) to RGB for JPEG - out_type = content_type - if img.mode in ("RGBA", "P") and content_type == "image/png": - img = img.convert("RGB") - out_type = "image/jpeg" - - buf = BytesIO() - if out_type == "image/jpeg": - img.save(buf, format="JPEG", quality=85, optimize=True) - else: - img.save(buf, format=img.format or "PNG", optimize=True) - - result = buf.getvalue() - - # Only use resized version if it's actually smaller - if len(result) < len(file_data): - return result, out_type - return file_data, content_type - - except ImportError: - # Pillow not installed — send original (Claude auto-resizes) - logger.debug("Pillow not available, sending original image to Claude") - return file_data, content_type - except Exception: - logger.warning("Image resize failed, sending original") - return file_data, content_type - - -async def _fetch_upload_images( - upload_ids: list[UUID], - account_id: UUID, - db: AsyncSession, -) -> list[dict[str, Any]]: - """Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision. - - Resizes images server-side to reduce network payload and applies a per-message - cap to control token budget (~1,600 tokens per full-res image). - """ - if not upload_ids or not settings.STORAGE_ENDPOINT: - return [] - - from app.services import storage_service - - # Cap the number of images to limit token cost - capped_ids = upload_ids[:MAX_IMAGES_PER_MESSAGE] - if len(upload_ids) > MAX_IMAGES_PER_MESSAGE: - logger.info( - "Capped images from %d to %d for token budget", - len(upload_ids), MAX_IMAGES_PER_MESSAGE, - ) - - result = await db.execute( - select(FileUpload).where( - FileUpload.id.in_(capped_ids), - FileUpload.account_id == account_id, - FileUpload.content_type.in_(VISION_CONTENT_TYPES), - ) - ) - uploads = result.scalars().all() - - images: list[dict[str, Any]] = [] - for upload in uploads: - try: - file_data = storage_service.download_file(upload.storage_key) - resized_data, media_type = _resize_image_for_vision( - file_data, upload.content_type - ) - images.append({ - "media_type": media_type, - "data": base64.b64encode(resized_data).decode("ascii"), - }) - except Exception: - logger.warning("Failed to fetch upload %s from S3", upload.id) - return images - - -def _require_ai_enabled() -> None: - if not settings.ai_enabled: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.", - ) - - -@router.post("/chats", response_model=ChatDetailResponse, status_code=201) -@limiter.limit("10/minute") -async def create_chat( - request: Request, - data: ChatCreateRequest, - current_user: Annotated[User, Depends(get_current_active_user)], - db: Annotated[AsyncSession, Depends(get_db)], - _: None = Depends(require_engineer_or_admin), -): - """Create a new empty chat conversation.""" - chat = await assistant_chat_service.create_chat( - user_id=current_user.id, - account_id=current_user.account_id, - db=db, - ) - await db.commit() - return ChatDetailResponse.model_validate(chat) - - -@router.get("/chats", response_model=list[ChatListResponse]) -async def list_chats( - current_user: Annotated[User, Depends(get_current_active_user)], - db: Annotated[AsyncSession, Depends(get_db)], - page: int = Query(1, ge=1), - size: int = Query(20, ge=1, le=100), -): - """List user's chat conversations (newest first, pinned on top).""" - offset = (page - 1) * size - result = await db.execute( - select(AssistantChat) - .where(AssistantChat.user_id == current_user.id) - .order_by(AssistantChat.pinned.desc(), AssistantChat.updated_at.desc()) - .offset(offset) - .limit(size) - ) - chats = result.scalars().all() - return [ChatListResponse.model_validate(c) for c in chats] - - -@router.get("/chats/{chat_id}", response_model=ChatDetailResponse) -async def get_chat( - chat_id: UUID, - current_user: Annotated[User, Depends(get_current_active_user)], - db: Annotated[AsyncSession, Depends(get_db)], -): - """Get a chat with full message history.""" - result = await db.execute( - select(AssistantChat).where( - AssistantChat.id == chat_id, - AssistantChat.user_id == current_user.id, - ) - ) - chat = result.scalar_one_or_none() - if not chat: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found") - return ChatDetailResponse.model_validate(chat) - - -@router.post("/chats/{chat_id}/messages", response_model=ChatMessageResponse) -@limiter.limit("10/minute") -async def post_message( - request: Request, - chat_id: UUID, - data: ChatMessageRequest, - current_user: Annotated[User, Depends(get_current_active_user)], - db: Annotated[AsyncSession, Depends(get_db)], - _: None = Depends(require_engineer_or_admin), -): - """Send a message and get AI response.""" - _require_ai_enabled() - - allowed, quota_status = await check_ai_quota( - user_id=current_user.id, - account_id=current_user.account_id, - db=db, - billing_anchor=current_user.ai_billing_cycle_anchor_at, - is_super_admin=current_user.is_super_admin, - ) - if not allowed: - reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at" - raise HTTPException( - status_code=status.HTTP_429_TOO_MANY_REQUESTS, - detail={ - "message": f"AI limit exceeded ({quota_status['deny_reason']})", - "reset_at": quota_status.get(reset_key), - "quota": quota_status, - }, - ) - - plan = await get_user_plan(current_user.account_id, db) - - # Capture scalar fields before the try block — after db.rollback() - # the ORM objects are expired and accessing attributes triggers a - # lazy load, which crashes in async context (MissingGreenlet). - user_id = current_user.id - account_id = current_user.account_id - - # Fetch attached images from S3 (if any) - images = await _fetch_upload_images(data.upload_ids, account_id, db) - - try: - ai_content, suggested_flows, chat = await assistant_chat_service.send_message( - chat_id=chat_id, - user_id=user_id, - account_id=account_id, - message=data.message, - db=db, - images=images or None, - ) - except ValueError as e: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) - except Exception as e: - logger.exception("Assistant chat message failed: %s", e) - await db.rollback() - await record_ai_usage( - user_id=user_id, - account_id=account_id, - conversation_id=None, - generation_type="assistant_message", - tier=plan, - input_tokens=0, - output_tokens=0, - estimated_cost=0, - succeeded=False, - counts_toward_quota=False, - error_code=type(e).__name__, - extra_data={"assistant_chat_id": str(chat_id)}, - db=db, - ) - await db.commit() - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail=f"AI provider error ({type(e).__name__}). Please try again.", - ) - - await record_ai_usage( - user_id=user_id, - account_id=account_id, - conversation_id=None, - generation_type="assistant_message", - tier=plan, - input_tokens=chat.total_input_tokens, - output_tokens=chat.total_output_tokens, - estimated_cost=( - chat.total_input_tokens * 1.0 / 1_000_000 - + chat.total_output_tokens * 5.0 / 1_000_000 - ), - succeeded=True, - counts_toward_quota=False, - error_code=None, - extra_data={"assistant_chat_id": str(chat_id)}, - db=db, - ) - await db.commit() - - return ChatMessageResponse( - content=ai_content, - suggested_flows=[SuggestedFlow.model_validate(sf) for sf in suggested_flows], - ) - - -@router.post("/chats/{chat_id}/conclude", response_model=ConcludeChatResponse) -@limiter.limit("10/minute") -async def conclude_chat( - request: Request, - chat_id: UUID, - data: ConcludeChatRequest, - current_user: Annotated[User, Depends(get_current_active_user)], - db: Annotated[AsyncSession, Depends(get_db)], - _: None = Depends(require_engineer_or_admin), -): - """Conclude a chat session and generate ticket-ready summary.""" - _require_ai_enabled() - - result = await db.execute( - select(AssistantChat).where( - AssistantChat.id == chat_id, - AssistantChat.user_id == current_user.id, - ) - ) - chat = result.scalar_one_or_none() - if not chat: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found") - - if chat.concluded_at: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Chat already concluded", - ) - - if chat.message_count < 2: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Chat must have at least one exchange before concluding", - ) - - try: - summary = await assistant_chat_service.generate_conclusion_summary( - chat=chat, - outcome=data.outcome, - notes=data.notes, - ) - except Exception as e: - logger.exception("Failed to generate conclusion summary: %s", e) - raise HTTPException( - status_code=status.HTTP_502_BAD_GATEWAY, - detail="Failed to generate summary. Please try again.", - ) - - now = datetime.now(timezone.utc) - chat.conclusion_outcome = data.outcome - chat.conclusion_summary = summary - chat.concluded_at = now - await db.commit() - - return ConcludeChatResponse( - summary=summary, - outcome=data.outcome, - concluded_at=now, - ) - - -@router.patch("/chats/{chat_id}", response_model=ChatDetailResponse) -async def update_chat( - chat_id: UUID, - data: ChatUpdateRequest, - current_user: Annotated[User, Depends(get_current_active_user)], - db: Annotated[AsyncSession, Depends(get_db)], -): - """Update chat title or pin/unpin.""" - result = await db.execute( - select(AssistantChat).where( - AssistantChat.id == chat_id, - AssistantChat.user_id == current_user.id, - ) - ) - chat = result.scalar_one_or_none() - if not chat: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found") - - if data.title is not None: - chat.title = data.title - if data.pinned is not None: - chat.pinned = data.pinned - - await db.commit() - return ChatDetailResponse.model_validate(chat) - - -@router.delete("/chats/{chat_id}", status_code=204) -async def delete_chat( - chat_id: UUID, - current_user: Annotated[User, Depends(get_current_active_user)], - db: Annotated[AsyncSession, Depends(get_db)], -): - """Delete a single chat.""" - result = await db.execute( - select(AssistantChat).where( - AssistantChat.id == chat_id, - AssistantChat.user_id == current_user.id, - ) - ) - chat = result.scalar_one_or_none() - if not chat: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found") - - await db.delete(chat) - await db.commit() - - -@router.delete("/chats", status_code=204) -async def bulk_delete_chats( - current_user: Annotated[User, Depends(get_current_active_user)], - db: Annotated[AsyncSession, Depends(get_db)], - older_than_days: int = Query(..., ge=1), -): - """Bulk delete chats older than N days (skips pinned).""" - cutoff = datetime.now(timezone.utc) - timedelta(days=older_than_days) - await db.execute( - delete(AssistantChat).where( - AssistantChat.user_id == current_user.id, - AssistantChat.pinned == False, # noqa: E712 - AssistantChat.updated_at < cutoff, - ) - ) - await db.commit() - - @router.get("/retention", response_model=RetentionSettingsResponse) async def get_retention_settings( current_user: Annotated[User, Depends(get_current_active_user)], diff --git a/backend/app/schemas/assistant_chat.py b/backend/app/schemas/assistant_chat.py index 6eeecbc1..2e497a74 100644 --- a/backend/app/schemas/assistant_chat.py +++ b/backend/app/schemas/assistant_chat.py @@ -1,54 +1,11 @@ -"""Pydantic schemas for standalone AI assistant chat.""" -from typing import Optional, Any, Literal -from uuid import UUID -from datetime import datetime +"""Pydantic schemas for chat retention settings. + +Chat CRUD schemas were removed — the active chat system uses +schemas from ai_session.py via the /ai-sessions endpoints. +""" +from typing import Optional from pydantic import BaseModel, Field -from app.schemas.copilot import SuggestedFlow - - -class ChatCreateRequest(BaseModel): - """Empty body — creates a new blank conversation.""" - pass - - -class ChatMessageRequest(BaseModel): - message: str = Field(..., min_length=1, max_length=8000) - upload_ids: list[UUID] = Field(default_factory=list, max_length=10) - - -class ChatMessageResponse(BaseModel): - content: str - suggested_flows: list[SuggestedFlow] = [] - - -class ChatListResponse(BaseModel): - id: UUID - title: str - message_count: int - pinned: bool - created_at: datetime - updated_at: datetime - - model_config = {"from_attributes": True} - - -class ChatDetailResponse(BaseModel): - id: UUID - title: str - messages: list[dict[str, Any]] - message_count: int - pinned: bool - created_at: datetime - updated_at: datetime - - model_config = {"from_attributes": True} - - -class ChatUpdateRequest(BaseModel): - title: Optional[str] = Field(None, min_length=1, max_length=255) - pinned: Optional[bool] = None - class RetentionSettingsResponse(BaseModel): chat_retention_days: Optional[int] @@ -58,14 +15,3 @@ class RetentionSettingsResponse(BaseModel): class RetentionSettingsUpdate(BaseModel): chat_retention_days: Optional[int] = Field(None, ge=1, le=365) chat_retention_max_count: Optional[int] = Field(None, ge=10, le=10000) - - -class ConcludeChatRequest(BaseModel): - outcome: Literal["resolved", "escalated", "paused"] - notes: Optional[str] = Field(None, max_length=2000) - - -class ConcludeChatResponse(BaseModel): - summary: str - outcome: str - concluded_at: datetime diff --git a/backend/app/services/assistant_chat_service.py b/backend/app/services/assistant_chat_service.py index e5287b7d..87a92e8b 100644 --- a/backend/app/services/assistant_chat_service.py +++ b/backend/app/services/assistant_chat_service.py @@ -1,7 +1,7 @@ -"""Standalone AI assistant chat service with RAG context. +"""Shared AI chat infrastructure — system prompt, prompt caching, and AI calling. -Provides persistent conversation history for general IT questions -with semantic search over the team's flow library. +Used by unified_chat_service (the active chat backend). The assistant_chat +CRUD endpoints were removed — only retention settings remain on that router. Uses Anthropic prompt caching to reduce cost on multi-turn conversations: - The static system prompt is cached (ephemeral, 5-min TTL) @@ -13,14 +13,8 @@ for real-time documentation lookups (controlled by ENABLE_MCP_MICROSOFT_LEARN). """ import logging from typing import Any -from uuid import UUID - -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import settings -from app.models.assistant_chat import AssistantChat -from app.services.rag_service import search as rag_search, build_rag_context, extract_suggested_flows logger = logging.getLogger(__name__) @@ -74,6 +68,11 @@ You have access to Microsoft's official documentation via Microsoft Learn. Use i - No team flow covers the topic and vendor-specific detail would help Do NOT use Microsoft Learn for every question — only when official docs add real value. +## Image Analysis +When an image is attached, analyze it carefully. Screenshots of error messages, \ +config panels, event viewer logs, and network diagrams are common in MSP work. \ +Describe what you see and use the visual information to inform your troubleshooting advice. + ## Boundaries - Stay focused on IT infrastructure, systems administration, and MSP operations. - If a question is clearly outside your domain, say so briefly and redirect. @@ -273,199 +272,3 @@ def _auto_title(message: str) -> str: if len(message) > 100: title = title.rsplit(" ", 1)[0] + "..." return title - - -CONCLUSION_SYSTEM_PROMPT = """\ -You are a ticket documentation specialist for MSP (Managed Service Provider) teams. \ -Your job is to transform an AI troubleshooting conversation into clean, professional \ -ticket notes that can be pasted directly into a PSA/ticketing system (ConnectWise, \ -Autotask, HaloPSA, etc.). - -## Output Format - -Generate a structured summary using this exact format: - -**Subject:** [One-line summary of the issue] - -**Outcome:** {outcome_label} - -**Problem Description:** -[2-3 sentence summary of the original problem] - -**Steps Taken:** -1. [Step] — [Result/finding] -2. [Step] — [Result/finding] -(list all troubleshooting steps from the conversation) - -**Current Status:** -[Where things stand now — what was resolved, what remains] - -{notes_section} - -**Key Findings:** -- [Important discovery or configuration detail] -- [Any relevant error codes, settings, or values identified] - -{resume_section} - -## Rules -- Be concise but thorough — these notes will be read by another engineer -- Include specific technical details (commands run, error messages, config values) -- Use plain text formatting (no HTML) — bold with ** is fine -- Do NOT include conversational filler, greetings, or meta-commentary -- Extract ALL actionable steps from the conversation, in chronological order -- If the conversation identified root cause, state it clearly -""" - - -async def generate_conclusion_summary( - chat: "AssistantChat", - outcome: str, - notes: str | None = None, -) -> str: - """Generate a ticket-ready summary from a concluded chat conversation.""" - outcome_labels = { - "resolved": "Resolved", - "escalated": "Escalated", - "paused": "Paused — To Be Continued", - } - outcome_label = outcome_labels.get(outcome, outcome) - - notes_section = "" - if notes: - notes_section = f"\n**Engineer Notes:**\n{notes}\n" - - resume_section = "" - if outcome == "paused": - resume_section = ( - "\n**Next Steps (for resumption):**\n" - "- [What needs to happen next]\n" - "- [Any pending actions or follow-ups]\n" - ) - elif outcome == "escalated": - resume_section = ( - "\n**Escalation Details:**\n" - "- [Reason for escalation]\n" - "- [Recommended next steps for receiving team/tier]\n" - ) - - # Build the conversation transcript for the AI - transcript_lines = [] - for msg in chat.messages: - role_label = "ENGINEER" if msg["role"] == "user" else "AI ASSISTANT" - transcript_lines.append(f"[{role_label}]: {msg['content']}") - - transcript = "\n\n".join(transcript_lines) - - prompt = ( - f"Outcome: {outcome_label}\n\n" - f"{'Engineer Notes: ' + notes if notes else '(No additional notes)'}\n\n" - f"--- CONVERSATION TRANSCRIPT ---\n\n{transcript}\n\n" - f"--- END TRANSCRIPT ---\n\n" - f"Generate the ticket notes now. Replace all placeholder brackets with actual content from the conversation. " - f"The notes_section placeholder should be: {notes_section or '(omit this section)'}\n" - f"The resume_section placeholder should be filled based on the conversation context." - ) - - system_with_vars = CONCLUSION_SYSTEM_PROMPT.replace( - "{outcome_label}", outcome_label - ).replace( - "{notes_section}", notes_section or "" - ).replace( - "{resume_section}", resume_section - ) - - content, _, _ = await _call_ai( - system_base=system_with_vars, - rag_context="", - history=[], - new_message=prompt, - max_tokens=2048, - ) - - return content - - -async def create_chat( - user_id: UUID, - account_id: UUID, - db: AsyncSession, -) -> AssistantChat: - """Create a new empty chat.""" - chat = AssistantChat( - user_id=user_id, - account_id=account_id, - messages=[], - ) - db.add(chat) - await db.flush() - return chat - - -async def send_message( - chat_id: UUID, - user_id: UUID, - account_id: UUID, - message: str, - db: AsyncSession, - images: list[dict[str, Any]] | None = None, -) -> tuple[str, list[dict[str, Any]], AssistantChat]: - """Send a user message and get AI response. - - Args: - images: Optional list of {"media_type": str, "data": str (base64)} - for vision content attached to this message. - - Returns (ai_content, suggested_flows, chat). - """ - result = await db.execute( - select(AssistantChat).where( - AssistantChat.id == chat_id, - AssistantChat.user_id == user_id, - ) - ) - chat = result.scalar_one_or_none() - if not chat: - raise ValueError("Chat not found") - - # Auto-title from first message - if chat.message_count == 0: - chat.title = _auto_title(message) - - # RAG search - rag_results = await rag_search( - query=message, - account_id=account_id, - db=db, - limit=8, - ) - - rag_context = build_rag_context(rag_results) - - # Build messages for AI - ai_messages: list[dict[str, Any]] = [] - for msg in chat.messages: - if msg["role"] in ("user", "assistant"): - ai_messages.append({"role": msg["role"], "content": msg["content"]}) - - # Call AI with prompt caching (Anthropic) or generic provider - ai_content, input_tokens, output_tokens = await _call_ai( - system_base=ASSISTANT_SYSTEM_PROMPT, - rag_context=rag_context, - history=ai_messages, - new_message=message, - images=images, - ) - - # Update chat - msgs = list(chat.messages) - msgs.append({"role": "user", "content": message}) - msgs.append({"role": "assistant", "content": ai_content}) - chat.messages = msgs - chat.message_count += 2 - chat.total_input_tokens += input_tokens - chat.total_output_tokens += output_tokens - - suggested_flows = extract_suggested_flows(rag_results) - - return ai_content, suggested_flows, chat diff --git a/backend/app/services/storage_service.py b/backend/app/services/storage_service.py index 5ff4d67f..b8e10292 100644 --- a/backend/app/services/storage_service.py +++ b/backend/app/services/storage_service.py @@ -1,7 +1,10 @@ """S3-compatible object storage service for file uploads.""" +import base64 import logging import uuid from io import BytesIO +from typing import Any +from uuid import UUID import boto3 from botocore.config import Config as BotoConfig @@ -92,3 +95,107 @@ async def delete_file(storage_key: str) -> None: client.delete_object(Bucket=settings.STORAGE_BUCKET_NAME, Key=storage_key) except ClientError: logger.warning(f"Failed to delete S3 object: {storage_key}") + + +# ── Vision helpers (resize + fetch for AI) ───────────────────── + +# Claude vision costs: (width × height) / 750 tokens per image. +# Claude auto-resizes images >1568px on the longest edge. +# We resize server-side to avoid sending multi-MB base64 payloads over the wire. +MAX_IMAGE_DIMENSION = 1568 # Claude's max efficient resolution +MAX_IMAGES_PER_MESSAGE = 3 # Cap to control token budget + + +def resize_image_for_vision(file_data: bytes, content_type: str) -> tuple[bytes, str]: + """Resize image to fit within Claude's efficient vision bounds. + + Returns (resized_bytes, media_type). Converts PNG screenshots to JPEG + when it reduces size significantly (screenshots are often huge PNGs). + """ + try: + from PIL import Image + + img = Image.open(BytesIO(file_data)) + w, h = img.size + + # Only resize if larger than Claude's max efficient dimension + if max(w, h) > MAX_IMAGE_DIMENSION: + ratio = MAX_IMAGE_DIMENSION / max(w, h) + new_w, new_h = int(w * ratio), int(h * ratio) + img = img.resize((new_w, new_h), Image.LANCZOS) + + # Convert RGBA (common in screenshots) to RGB for JPEG + out_type = content_type + if img.mode in ("RGBA", "P") and content_type == "image/png": + img = img.convert("RGB") + out_type = "image/jpeg" + + buf = BytesIO() + if out_type == "image/jpeg": + img.save(buf, format="JPEG", quality=85, optimize=True) + else: + img.save(buf, format=img.format or "PNG", optimize=True) + + result = buf.getvalue() + + # Only use resized version if it's actually smaller + if len(result) < len(file_data): + return result, out_type + return file_data, content_type + + except ImportError: + # Pillow not installed — send original (Claude auto-resizes) + logger.debug("Pillow not available, sending original image to Claude") + return file_data, content_type + except Exception: + logger.warning("Image resize failed, sending original") + return file_data, content_type + + +async def fetch_upload_images( + upload_ids: list[UUID], + account_id: UUID, + db: Any, +) -> list[dict[str, Any]]: + """Fetch uploaded images from S3 and return as base64-encoded dicts for Claude vision. + + Resizes images server-side to reduce network payload and applies a per-message + cap to control token budget (~1,600 tokens per full-res image). + """ + if not upload_ids or not settings.STORAGE_ENDPOINT: + return [] + + from sqlalchemy import select + from app.models.file_upload import FileUpload + + # Cap the number of images to limit token cost + capped_ids = upload_ids[:MAX_IMAGES_PER_MESSAGE] + if len(upload_ids) > MAX_IMAGES_PER_MESSAGE: + logger.info( + "Capped images from %d to %d for token budget", + len(upload_ids), MAX_IMAGES_PER_MESSAGE, + ) + + result = await db.execute( + select(FileUpload).where( + FileUpload.id.in_(capped_ids), + FileUpload.account_id == account_id, + FileUpload.content_type.in_(ALLOWED_IMAGE_TYPES), + ) + ) + uploads = result.scalars().all() + + images: list[dict[str, Any]] = [] + for upload in uploads: + try: + file_data = download_file(upload.storage_key) + resized_data, media_type = resize_image_for_vision( + file_data, upload.content_type + ) + images.append({ + "media_type": media_type, + "data": base64.b64encode(resized_data).decode("ascii"), + }) + except Exception: + logger.warning("Failed to fetch upload %s from S3", upload.id) + return images diff --git a/frontend/src/api/assistantChat.ts b/frontend/src/api/assistantChat.ts index 4c8a7d27..d4538814 100644 --- a/frontend/src/api/assistantChat.ts +++ b/frontend/src/api/assistantChat.ts @@ -1,52 +1,13 @@ import apiClient from './client' -import type { - AssistantChat, - ChatListItem, - ChatMessageResponse, - RetentionSettings, - ConcludeChatRequest, - ConcludeChatResponse, -} from '@/types/assistant-chat' +import type { RetentionSettings } from '@/types/assistant-chat' +/** + * Chat retention settings API. + * + * Note: Chat CRUD methods were removed — the frontend uses aiSessionsApi + * for all chat operations. Only retention settings remain on the /assistant prefix. + */ export const assistantChatApi = { - async createChat(): Promise { - const response = await apiClient.post('/assistant/chats', {}) - return response.data - }, - - async listChats(page = 1, size = 20): Promise { - const response = await apiClient.get('/assistant/chats', { - params: { page, size }, - }) - return response.data - }, - - async getChat(chatId: string): Promise { - const response = await apiClient.get(`/assistant/chats/${chatId}`) - return response.data - }, - - async sendMessage(chatId: string, message: string): Promise { - const response = await apiClient.post( - `/assistant/chats/${chatId}/messages`, - { message } - ) - return response.data - }, - - async updateChat(chatId: string, data: { title?: string; pinned?: boolean }): Promise { - const response = await apiClient.patch(`/assistant/chats/${chatId}`, data) - return response.data - }, - - async deleteChat(chatId: string): Promise { - await apiClient.delete(`/assistant/chats/${chatId}`) - }, - - async bulkDeleteChats(olderThanDays: number): Promise { - await apiClient.delete('/assistant/chats', { params: { older_than_days: olderThanDays } }) - }, - async getRetentionSettings(): Promise { const response = await apiClient.get('/assistant/retention') return response.data @@ -56,14 +17,6 @@ export const assistantChatApi = { const response = await apiClient.patch('/assistant/retention', data) return response.data }, - - async concludeChat(chatId: string, data: ConcludeChatRequest): Promise { - const response = await apiClient.post( - `/assistant/chats/${chatId}/conclude`, - data - ) - return response.data - }, } export default assistantChatApi diff --git a/frontend/src/types/assistant-chat.ts b/frontend/src/types/assistant-chat.ts index 7e04ad28..8b0d25f4 100644 --- a/frontend/src/types/assistant-chat.ts +++ b/frontend/src/types/assistant-chat.ts @@ -1,20 +1,3 @@ -import type { SuggestedFlow } from './copilot' - -export interface AssistantChat { - id: string - title: string - messages: AssistantChatMessage[] - message_count: number - pinned: boolean - created_at: string - updated_at: string -} - -export interface AssistantChatMessage { - role: 'user' | 'assistant' - content: string -} - export interface ChatListItem { id: string title: string @@ -24,27 +7,9 @@ export interface ChatListItem { updated_at: string } -export interface ChatMessageResponse { - content: string - suggested_flows: SuggestedFlow[] -} - export interface RetentionSettings { chat_retention_days: number | null chat_retention_max_count: number | null } export type ConclusionOutcome = 'resolved' | 'escalated' | 'paused' - -export interface ConcludeChatRequest { - outcome: ConclusionOutcome - notes?: string -} - -export interface ConcludeChatResponse { - summary: string - outcome: ConclusionOutcome - concluded_at: string -} - -export type { SuggestedFlow } diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts index f1967d46..7fffdeb6 100644 --- a/frontend/src/types/index.ts +++ b/frontend/src/types/index.ts @@ -11,7 +11,7 @@ export type { Account, Subscription, PlanLimits, SubscriptionDetails, AccountInv export * from './admin' export * from './analytics' export * from './copilot' -export type { AssistantChat, AssistantChatMessage, ChatListItem, ChatMessageResponse, RetentionSettings } from './assistant-chat' +export type { ChatListItem, RetentionSettings, ConclusionOutcome } from './assistant-chat' export * from './ai-session' export * from './flow-proposal' export * from './flowpilot-analytics'