feat: add AI assistant with in-session copilot and standalone chat with RAG

Implements three-phase AI assistant feature: - Phase 0: RAG infrastructure with pgvector embeddings, Voyage AI integration, tree chunking service, and semantic search over team's flow library - Phase 1: In-session copilot panel during flow navigation with contextual AI help, current step awareness, and suggested related flows - Phase 2: Standalone AI chat page with persistent conversation history, pin/delete, and configurable retention policies (account-level) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 01:36:36 -05:00
parent 41cb7956cb
commit 1aa60dada2
44 changed files with 3080 additions and 14 deletions
--- a/backend/app/api/endpoints/assistant_chat.py
+++ b/backend/app/api/endpoints/assistant_chat.py
@@ -0,0 +1,320 @@
+"""Standalone AI assistant chat endpoints.
+
+  POST   /assistant/chats              — Create new chat
+  GET    /assistant/chats              — List chats (paginated, newest first)
+  GET    /assistant/chats/{id}         — Get chat with messages
+  POST   /assistant/chats/{id}/messages — Send message
+  PATCH  /assistant/chats/{id}         — Update title, pin/unpin
+  DELETE /assistant/chats/{id}         — Delete single chat
+  DELETE /assistant/chats              — Bulk delete (older_than_days query param)
+  GET    /assistant/retention          — Get account retention settings
+  PATCH  /assistant/retention          — Update retention settings (owner only)
+"""
+import logging
+from datetime import datetime, timezone, timedelta
+from typing import Annotated, Optional
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
+from sqlalchemy import select, delete, func
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.rate_limit import limiter
+from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
+from app.core.config import settings
+from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan
+from app.models.user import User
+from app.models.account import Account
+from app.models.assistant_chat import AssistantChat
+from app.schemas.assistant_chat import (
+    ChatCreateRequest,
+    ChatMessageRequest,
+    ChatMessageResponse,
+    ChatListResponse,
+    ChatDetailResponse,
+    ChatUpdateRequest,
+    RetentionSettingsResponse,
+    RetentionSettingsUpdate,
+)
+from app.schemas.copilot import SuggestedFlow
+from app.services import assistant_chat_service
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/assistant", tags=["assistant-chat"])
+
+
+def _require_ai_enabled() -> None:
+    if not settings.ai_enabled:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.",
+        )
+
+
+@router.post("/chats", response_model=ChatDetailResponse, status_code=201)
+@limiter.limit("10/minute")
+async def create_chat(
+    request: Request,
+    data: ChatCreateRequest,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+    _: None = Depends(require_engineer_or_admin),
+):
+    """Create a new empty chat conversation."""
+    chat = await assistant_chat_service.create_chat(
+        user_id=current_user.id,
+        account_id=current_user.account_id,
+        db=db,
+    )
+    await db.commit()
+    return ChatDetailResponse.model_validate(chat)
+
+
+@router.get("/chats", response_model=list[ChatListResponse])
+async def list_chats(
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+    page: int = Query(1, ge=1),
+    size: int = Query(20, ge=1, le=100),
+):
+    """List user's chat conversations (newest first, pinned on top)."""
+    offset = (page - 1) * size
+    result = await db.execute(
+        select(AssistantChat)
+        .where(AssistantChat.user_id == current_user.id)
+        .order_by(AssistantChat.pinned.desc(), AssistantChat.updated_at.desc())
+        .offset(offset)
+        .limit(size)
+    )
+    chats = result.scalars().all()
+    return [ChatListResponse.model_validate(c) for c in chats]
+
+
+@router.get("/chats/{chat_id}", response_model=ChatDetailResponse)
+async def get_chat(
+    chat_id: UUID,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+):
+    """Get a chat with full message history."""
+    result = await db.execute(
+        select(AssistantChat).where(
+            AssistantChat.id == chat_id,
+            AssistantChat.user_id == current_user.id,
+        )
+    )
+    chat = result.scalar_one_or_none()
+    if not chat:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
+    return ChatDetailResponse.model_validate(chat)
+
+
+@router.post("/chats/{chat_id}/messages", response_model=ChatMessageResponse)
+@limiter.limit("10/minute")
+async def post_message(
+    request: Request,
+    chat_id: UUID,
+    data: ChatMessageRequest,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+    _: None = Depends(require_engineer_or_admin),
+):
+    """Send a message and get AI response."""
+    _require_ai_enabled()
+
+    allowed, quota_status = await check_ai_quota(
+        user_id=current_user.id,
+        account_id=current_user.account_id,
+        db=db,
+        billing_anchor=current_user.ai_billing_cycle_anchor_at,
+        is_super_admin=current_user.is_super_admin,
+    )
+    if not allowed:
+        reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at"
+        raise HTTPException(
+            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+            detail={
+                "message": f"AI limit exceeded ({quota_status['deny_reason']})",
+                "reset_at": quota_status.get(reset_key),
+                "quota": quota_status,
+            },
+        )
+
+    plan = await get_user_plan(current_user.account_id, db)
+
+    try:
+        ai_content, suggested_flows, chat = await assistant_chat_service.send_message(
+            chat_id=chat_id,
+            user_id=current_user.id,
+            account_id=current_user.account_id,
+            message=data.message,
+            db=db,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
+    except Exception as e:
+        logger.exception("Assistant chat message failed: %s", e)
+        await record_ai_usage(
+            user_id=current_user.id,
+            account_id=current_user.account_id,
+            conversation_id=None,
+            generation_type="assistant_message",
+            tier=plan,
+            input_tokens=0,
+            output_tokens=0,
+            estimated_cost=0,
+            succeeded=False,
+            counts_toward_quota=False,
+            error_code=type(e).__name__,
+            extra_data={"assistant_chat_id": str(chat_id)},
+            db=db,
+        )
+        await db.commit()
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"AI provider error ({type(e).__name__}). Please try again.",
+        )
+
+    await record_ai_usage(
+        user_id=current_user.id,
+        account_id=current_user.account_id,
+        conversation_id=None,
+        generation_type="assistant_message",
+        tier=plan,
+        input_tokens=chat.total_input_tokens,
+        output_tokens=chat.total_output_tokens,
+        estimated_cost=(
+            chat.total_input_tokens * 1.0 / 1_000_000
+            + chat.total_output_tokens * 5.0 / 1_000_000
+        ),
+        succeeded=True,
+        counts_toward_quota=False,
+        error_code=None,
+        extra_data={"assistant_chat_id": str(chat_id)},
+        db=db,
+    )
+    await db.commit()
+
+    return ChatMessageResponse(
+        content=ai_content,
+        suggested_flows=[SuggestedFlow(**sf) for sf in suggested_flows],
+    )
+
+
+@router.patch("/chats/{chat_id}", response_model=ChatDetailResponse)
+async def update_chat(
+    chat_id: UUID,
+    data: ChatUpdateRequest,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+):
+    """Update chat title or pin/unpin."""
+    result = await db.execute(
+        select(AssistantChat).where(
+            AssistantChat.id == chat_id,
+            AssistantChat.user_id == current_user.id,
+        )
+    )
+    chat = result.scalar_one_or_none()
+    if not chat:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
+
+    if data.title is not None:
+        chat.title = data.title
+    if data.pinned is not None:
+        chat.pinned = data.pinned
+
+    await db.commit()
+    return ChatDetailResponse.model_validate(chat)
+
+
+@router.delete("/chats/{chat_id}", status_code=204)
+async def delete_chat(
+    chat_id: UUID,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+):
+    """Delete a single chat."""
+    result = await db.execute(
+        select(AssistantChat).where(
+            AssistantChat.id == chat_id,
+            AssistantChat.user_id == current_user.id,
+        )
+    )
+    chat = result.scalar_one_or_none()
+    if not chat:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
+
+    await db.delete(chat)
+    await db.commit()
+
+
+@router.delete("/chats", status_code=204)
+async def bulk_delete_chats(
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+    older_than_days: int = Query(..., ge=1),
+):
+    """Bulk delete chats older than N days (skips pinned)."""
+    cutoff = datetime.now(timezone.utc) - timedelta(days=older_than_days)
+    await db.execute(
+        delete(AssistantChat).where(
+            AssistantChat.user_id == current_user.id,
+            AssistantChat.pinned == False,  # noqa: E712
+            AssistantChat.updated_at < cutoff,
+        )
+    )
+    await db.commit()
+
+
+@router.get("/retention", response_model=RetentionSettingsResponse)
+async def get_retention_settings(
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+):
+    """Get account chat retention settings."""
+    result = await db.execute(
+        select(Account).where(Account.id == current_user.account_id)
+    )
+    account = result.scalar_one_or_none()
+    if not account:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Account not found")
+
+    return RetentionSettingsResponse(
+        chat_retention_days=account.chat_retention_days,
+        chat_retention_max_count=account.chat_retention_max_count,
+    )
+
+
+@router.patch("/retention", response_model=RetentionSettingsResponse)
+async def update_retention_settings(
+    data: RetentionSettingsUpdate,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+):
+    """Update account chat retention settings (account owner only)."""
+    result = await db.execute(
+        select(Account).where(Account.id == current_user.account_id)
+    )
+    account = result.scalar_one_or_none()
+    if not account:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Account not found")
+
+    if account.owner_id != current_user.id and not current_user.is_super_admin:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Only the account owner can update retention settings",
+        )
+
+    if data.chat_retention_days is not None:
+        account.chat_retention_days = data.chat_retention_days
+    if data.chat_retention_max_count is not None:
+        account.chat_retention_max_count = data.chat_retention_max_count
+
+    await db.commit()
+
+    return RetentionSettingsResponse(
+        chat_retention_days=account.chat_retention_days,
+        chat_retention_max_count=account.chat_retention_max_count,
+    )
--- a/backend/app/api/endpoints/copilot.py
+++ b/backend/app/api/endpoints/copilot.py
@@ -0,0 +1,190 @@
+"""In-session copilot endpoints.
+
+Contextual AI assistant during flow navigation:
+  POST /copilot/conversations        — Start conversation (requires tree_id)
+  POST /copilot/conversations/{id}/messages — Send message, get response + suggestions
+  GET  /copilot/conversations/{id}   — Get conversation history
+"""
+import logging
+from typing import Annotated
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, HTTPException, Request, status
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.rate_limit import limiter
+from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
+from app.core.config import settings
+from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan
+from app.models.user import User
+from app.schemas.copilot import (
+    CopilotStartRequest,
+    CopilotStartResponse,
+    CopilotMessageRequest,
+    CopilotMessageResponse,
+    CopilotConversationResponse,
+    SuggestedFlow,
+)
+from app.services import copilot_service
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/copilot", tags=["copilot"])
+
+
+def _require_ai_enabled() -> None:
+    if not settings.ai_enabled:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.",
+        )
+
+
+@router.post("/conversations", response_model=CopilotStartResponse, status_code=201)
+@limiter.limit("10/minute")
+async def start_conversation(
+    request: Request,
+    data: CopilotStartRequest,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+    _: None = Depends(require_engineer_or_admin),
+):
+    """Start a new copilot conversation for a flow."""
+    _require_ai_enabled()
+
+    allowed, quota_status = await check_ai_quota(
+        user_id=current_user.id,
+        account_id=current_user.account_id,
+        db=db,
+        billing_anchor=current_user.ai_billing_cycle_anchor_at,
+        is_super_admin=current_user.is_super_admin,
+    )
+    if not allowed:
+        reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at"
+        raise HTTPException(
+            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+            detail={
+                "message": f"AI limit exceeded ({quota_status['deny_reason']})",
+                "reset_at": quota_status.get(reset_key),
+                "quota": quota_status,
+            },
+        )
+
+    try:
+        conversation, greeting = await copilot_service.start_conversation(
+            user_id=current_user.id,
+            account_id=current_user.account_id,
+            tree_id=data.tree_id,
+            session_id=data.session_id,
+            current_node_id=data.current_node_id,
+            db=db,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
+    except Exception as e:
+        logger.exception("Copilot conversation start failed: %s", e)
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"AI provider error ({type(e).__name__}). Please try again.",
+        )
+
+    await db.commit()
+
+    return CopilotStartResponse(
+        conversation_id=conversation.id,
+        greeting=greeting,
+    )
+
+
+@router.post("/conversations/{conversation_id}/messages", response_model=CopilotMessageResponse)
+@limiter.limit("10/minute")
+async def post_message(
+    request: Request,
+    conversation_id: UUID,
+    data: CopilotMessageRequest,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+    _: None = Depends(require_engineer_or_admin),
+):
+    """Send a message and get AI response with flow suggestions."""
+    _require_ai_enabled()
+
+    plan = await get_user_plan(current_user.account_id, db)
+
+    try:
+        ai_content, suggested_flows = await copilot_service.send_message(
+            conversation_id=conversation_id,
+            user_id=current_user.id,
+            message=data.message,
+            current_node_id=data.current_node_id,
+            db=db,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
+    except Exception as e:
+        logger.exception("Copilot message failed: %s", e)
+        await record_ai_usage(
+            user_id=current_user.id,
+            account_id=current_user.account_id,
+            conversation_id=None,
+            generation_type="copilot_message",
+            tier=plan,
+            input_tokens=0,
+            output_tokens=0,
+            estimated_cost=0,
+            succeeded=False,
+            counts_toward_quota=False,
+            error_code=type(e).__name__,
+            extra_data={"copilot_conversation_id": str(conversation_id)},
+            db=db,
+        )
+        await db.commit()
+        raise HTTPException(
+            status_code=status.HTTP_502_BAD_GATEWAY,
+            detail=f"AI provider error ({type(e).__name__}). Please try again.",
+        )
+
+    await record_ai_usage(
+        user_id=current_user.id,
+        account_id=current_user.account_id,
+        conversation_id=None,
+        generation_type="copilot_message",
+        tier=plan,
+        input_tokens=0,
+        output_tokens=0,
+        estimated_cost=0,
+        succeeded=True,
+        counts_toward_quota=False,
+        error_code=None,
+        extra_data={"copilot_conversation_id": str(conversation_id)},
+        db=db,
+    )
+    await db.commit()
+
+    return CopilotMessageResponse(
+        content=ai_content,
+        suggested_flows=[SuggestedFlow(**sf) for sf in suggested_flows],
+    )
+
+
+@router.get("/conversations/{conversation_id}", response_model=CopilotConversationResponse)
+async def get_conversation(
+    conversation_id: UUID,
+    current_user: Annotated[User, Depends(get_current_active_user)],
+    db: Annotated[AsyncSession, Depends(get_db)],
+):
+    """Get copilot conversation history."""
+    from sqlalchemy import select
+    from app.models.copilot_conversation import CopilotConversation
+
+    result = await db.execute(
+        select(CopilotConversation).where(
+            CopilotConversation.id == conversation_id,
+            CopilotConversation.user_id == current_user.id,
+        )
+    )
+    conversation = result.scalar_one_or_none()
+    if not conversation:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Conversation not found")
+
+    return CopilotConversationResponse.model_validate(conversation)
--- a/backend/app/api/endpoints/trees.py
+++ b/backend/app/api/endpoints/trees.py
@@ -1,3 +1,4 @@
+import logging
 from datetime import datetime, timezone
 from typing import Annotated, Optional
 from uuid import UUID
@@ -29,6 +30,7 @@ from app.core.audit import log_audit
 from app.core.config import settings
 from app.core.tree_validation import can_publish_tree
 from app.core.step_sync import sync_steps_from_tree, deactivate_synced_steps_for_tree
+from app.services.rag_service import index_tree as rag_index_tree

 router = APIRouter(prefix="/trees", tags=["trees"])

@@ -542,6 +544,13 @@ async def create_tree(
    )
    tree = result.scalar_one()

+    # Index tree for RAG (best-effort, don't fail the request)
+    try:
+        await rag_index_tree(tree.id, db)
+        await db.commit()
+    except Exception:
+        logging.getLogger(__name__).warning("RAG indexing failed for tree %s", tree.id)
+
    return build_full_tree_response(tree)


@@ -725,6 +734,13 @@ async def update_tree(
    )
    tree = result.scalar_one()

+    # Re-index tree for RAG (best-effort)
+    try:
+        await rag_index_tree(tree.id, db)
+        await db.commit()
+    except Exception:
+        logging.getLogger(__name__).warning("RAG re-indexing failed for tree %s", tree.id)
+
    return build_full_tree_response(tree)


--- a/backend/app/api/router.py
+++ b/backend/app/api/router.py
@@ -8,6 +8,8 @@ from app.api.endpoints import feedback
 from app.api.endpoints import ai_builder
 from app.api.endpoints import ai_fix
 from app.api.endpoints import ai_chat
+from app.api.endpoints import copilot
+from app.api.endpoints import assistant_chat

 api_router = APIRouter()

@@ -40,3 +42,5 @@ api_router.include_router(feedback.router)
 api_router.include_router(ai_builder.router)
 api_router.include_router(ai_fix.router)
 api_router.include_router(ai_chat.router)
+api_router.include_router(copilot.router)
+api_router.include_router(assistant_chat.router)