feat: add AI assistant with in-session copilot and standalone chat with RAG

Implements three-phase AI assistant feature:
- Phase 0: RAG infrastructure with pgvector embeddings, Voyage AI integration,
  tree chunking service, and semantic search over team's flow library
- Phase 1: In-session copilot panel during flow navigation with contextual
  AI help, current step awareness, and suggested related flows
- Phase 2: Standalone AI chat page with persistent conversation history,
  pin/delete, and configurable retention policies (account-level)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Michael Chihlas
2026-03-04 01:36:36 -05:00
parent 41cb7956cb
commit 1aa60dada2
44 changed files with 3080 additions and 14 deletions

View File

@@ -0,0 +1,320 @@
"""Standalone AI assistant chat endpoints.
POST /assistant/chats — Create new chat
GET /assistant/chats — List chats (paginated, newest first)
GET /assistant/chats/{id} — Get chat with messages
POST /assistant/chats/{id}/messages — Send message
PATCH /assistant/chats/{id} — Update title, pin/unpin
DELETE /assistant/chats/{id} — Delete single chat
DELETE /assistant/chats — Bulk delete (older_than_days query param)
GET /assistant/retention — Get account retention settings
PATCH /assistant/retention — Update retention settings (owner only)
"""
import logging
from datetime import datetime, timezone, timedelta
from typing import Annotated, Optional
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from sqlalchemy import select, delete, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.rate_limit import limiter
from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
from app.core.config import settings
from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan
from app.models.user import User
from app.models.account import Account
from app.models.assistant_chat import AssistantChat
from app.schemas.assistant_chat import (
ChatCreateRequest,
ChatMessageRequest,
ChatMessageResponse,
ChatListResponse,
ChatDetailResponse,
ChatUpdateRequest,
RetentionSettingsResponse,
RetentionSettingsUpdate,
)
from app.schemas.copilot import SuggestedFlow
from app.services import assistant_chat_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/assistant", tags=["assistant-chat"])
def _require_ai_enabled() -> None:
if not settings.ai_enabled:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.",
)
@router.post("/chats", response_model=ChatDetailResponse, status_code=201)
@limiter.limit("10/minute")
async def create_chat(
request: Request,
data: ChatCreateRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Create a new empty chat conversation."""
chat = await assistant_chat_service.create_chat(
user_id=current_user.id,
account_id=current_user.account_id,
db=db,
)
await db.commit()
return ChatDetailResponse.model_validate(chat)
@router.get("/chats", response_model=list[ChatListResponse])
async def list_chats(
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
page: int = Query(1, ge=1),
size: int = Query(20, ge=1, le=100),
):
"""List user's chat conversations (newest first, pinned on top)."""
offset = (page - 1) * size
result = await db.execute(
select(AssistantChat)
.where(AssistantChat.user_id == current_user.id)
.order_by(AssistantChat.pinned.desc(), AssistantChat.updated_at.desc())
.offset(offset)
.limit(size)
)
chats = result.scalars().all()
return [ChatListResponse.model_validate(c) for c in chats]
@router.get("/chats/{chat_id}", response_model=ChatDetailResponse)
async def get_chat(
chat_id: UUID,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get a chat with full message history."""
result = await db.execute(
select(AssistantChat).where(
AssistantChat.id == chat_id,
AssistantChat.user_id == current_user.id,
)
)
chat = result.scalar_one_or_none()
if not chat:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
return ChatDetailResponse.model_validate(chat)
@router.post("/chats/{chat_id}/messages", response_model=ChatMessageResponse)
@limiter.limit("10/minute")
async def post_message(
request: Request,
chat_id: UUID,
data: ChatMessageRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Send a message and get AI response."""
_require_ai_enabled()
allowed, quota_status = await check_ai_quota(
user_id=current_user.id,
account_id=current_user.account_id,
db=db,
billing_anchor=current_user.ai_billing_cycle_anchor_at,
is_super_admin=current_user.is_super_admin,
)
if not allowed:
reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at"
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail={
"message": f"AI limit exceeded ({quota_status['deny_reason']})",
"reset_at": quota_status.get(reset_key),
"quota": quota_status,
},
)
plan = await get_user_plan(current_user.account_id, db)
try:
ai_content, suggested_flows, chat = await assistant_chat_service.send_message(
chat_id=chat_id,
user_id=current_user.id,
account_id=current_user.account_id,
message=data.message,
db=db,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
except Exception as e:
logger.exception("Assistant chat message failed: %s", e)
await record_ai_usage(
user_id=current_user.id,
account_id=current_user.account_id,
conversation_id=None,
generation_type="assistant_message",
tier=plan,
input_tokens=0,
output_tokens=0,
estimated_cost=0,
succeeded=False,
counts_toward_quota=False,
error_code=type(e).__name__,
extra_data={"assistant_chat_id": str(chat_id)},
db=db,
)
await db.commit()
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.",
)
await record_ai_usage(
user_id=current_user.id,
account_id=current_user.account_id,
conversation_id=None,
generation_type="assistant_message",
tier=plan,
input_tokens=chat.total_input_tokens,
output_tokens=chat.total_output_tokens,
estimated_cost=(
chat.total_input_tokens * 1.0 / 1_000_000
+ chat.total_output_tokens * 5.0 / 1_000_000
),
succeeded=True,
counts_toward_quota=False,
error_code=None,
extra_data={"assistant_chat_id": str(chat_id)},
db=db,
)
await db.commit()
return ChatMessageResponse(
content=ai_content,
suggested_flows=[SuggestedFlow(**sf) for sf in suggested_flows],
)
@router.patch("/chats/{chat_id}", response_model=ChatDetailResponse)
async def update_chat(
chat_id: UUID,
data: ChatUpdateRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Update chat title or pin/unpin."""
result = await db.execute(
select(AssistantChat).where(
AssistantChat.id == chat_id,
AssistantChat.user_id == current_user.id,
)
)
chat = result.scalar_one_or_none()
if not chat:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
if data.title is not None:
chat.title = data.title
if data.pinned is not None:
chat.pinned = data.pinned
await db.commit()
return ChatDetailResponse.model_validate(chat)
@router.delete("/chats/{chat_id}", status_code=204)
async def delete_chat(
chat_id: UUID,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Delete a single chat."""
result = await db.execute(
select(AssistantChat).where(
AssistantChat.id == chat_id,
AssistantChat.user_id == current_user.id,
)
)
chat = result.scalar_one_or_none()
if not chat:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
await db.delete(chat)
await db.commit()
@router.delete("/chats", status_code=204)
async def bulk_delete_chats(
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
older_than_days: int = Query(..., ge=1),
):
"""Bulk delete chats older than N days (skips pinned)."""
cutoff = datetime.now(timezone.utc) - timedelta(days=older_than_days)
await db.execute(
delete(AssistantChat).where(
AssistantChat.user_id == current_user.id,
AssistantChat.pinned == False, # noqa: E712
AssistantChat.updated_at < cutoff,
)
)
await db.commit()
@router.get("/retention", response_model=RetentionSettingsResponse)
async def get_retention_settings(
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get account chat retention settings."""
result = await db.execute(
select(Account).where(Account.id == current_user.account_id)
)
account = result.scalar_one_or_none()
if not account:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Account not found")
return RetentionSettingsResponse(
chat_retention_days=account.chat_retention_days,
chat_retention_max_count=account.chat_retention_max_count,
)
@router.patch("/retention", response_model=RetentionSettingsResponse)
async def update_retention_settings(
data: RetentionSettingsUpdate,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Update account chat retention settings (account owner only)."""
result = await db.execute(
select(Account).where(Account.id == current_user.account_id)
)
account = result.scalar_one_or_none()
if not account:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Account not found")
if account.owner_id != current_user.id and not current_user.is_super_admin:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Only the account owner can update retention settings",
)
if data.chat_retention_days is not None:
account.chat_retention_days = data.chat_retention_days
if data.chat_retention_max_count is not None:
account.chat_retention_max_count = data.chat_retention_max_count
await db.commit()
return RetentionSettingsResponse(
chat_retention_days=account.chat_retention_days,
chat_retention_max_count=account.chat_retention_max_count,
)

View File

@@ -0,0 +1,190 @@
"""In-session copilot endpoints.
Contextual AI assistant during flow navigation:
POST /copilot/conversations — Start conversation (requires tree_id)
POST /copilot/conversations/{id}/messages — Send message, get response + suggestions
GET /copilot/conversations/{id} — Get conversation history
"""
import logging
from typing import Annotated
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Request, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.rate_limit import limiter
from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
from app.core.config import settings
from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan
from app.models.user import User
from app.schemas.copilot import (
CopilotStartRequest,
CopilotStartResponse,
CopilotMessageRequest,
CopilotMessageResponse,
CopilotConversationResponse,
SuggestedFlow,
)
from app.services import copilot_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/copilot", tags=["copilot"])
def _require_ai_enabled() -> None:
if not settings.ai_enabled:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.",
)
@router.post("/conversations", response_model=CopilotStartResponse, status_code=201)
@limiter.limit("10/minute")
async def start_conversation(
request: Request,
data: CopilotStartRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Start a new copilot conversation for a flow."""
_require_ai_enabled()
allowed, quota_status = await check_ai_quota(
user_id=current_user.id,
account_id=current_user.account_id,
db=db,
billing_anchor=current_user.ai_billing_cycle_anchor_at,
is_super_admin=current_user.is_super_admin,
)
if not allowed:
reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at"
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail={
"message": f"AI limit exceeded ({quota_status['deny_reason']})",
"reset_at": quota_status.get(reset_key),
"quota": quota_status,
},
)
try:
conversation, greeting = await copilot_service.start_conversation(
user_id=current_user.id,
account_id=current_user.account_id,
tree_id=data.tree_id,
session_id=data.session_id,
current_node_id=data.current_node_id,
db=db,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
except Exception as e:
logger.exception("Copilot conversation start failed: %s", e)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.",
)
await db.commit()
return CopilotStartResponse(
conversation_id=conversation.id,
greeting=greeting,
)
@router.post("/conversations/{conversation_id}/messages", response_model=CopilotMessageResponse)
@limiter.limit("10/minute")
async def post_message(
request: Request,
conversation_id: UUID,
data: CopilotMessageRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Send a message and get AI response with flow suggestions."""
_require_ai_enabled()
plan = await get_user_plan(current_user.account_id, db)
try:
ai_content, suggested_flows = await copilot_service.send_message(
conversation_id=conversation_id,
user_id=current_user.id,
message=data.message,
current_node_id=data.current_node_id,
db=db,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
except Exception as e:
logger.exception("Copilot message failed: %s", e)
await record_ai_usage(
user_id=current_user.id,
account_id=current_user.account_id,
conversation_id=None,
generation_type="copilot_message",
tier=plan,
input_tokens=0,
output_tokens=0,
estimated_cost=0,
succeeded=False,
counts_toward_quota=False,
error_code=type(e).__name__,
extra_data={"copilot_conversation_id": str(conversation_id)},
db=db,
)
await db.commit()
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.",
)
await record_ai_usage(
user_id=current_user.id,
account_id=current_user.account_id,
conversation_id=None,
generation_type="copilot_message",
tier=plan,
input_tokens=0,
output_tokens=0,
estimated_cost=0,
succeeded=True,
counts_toward_quota=False,
error_code=None,
extra_data={"copilot_conversation_id": str(conversation_id)},
db=db,
)
await db.commit()
return CopilotMessageResponse(
content=ai_content,
suggested_flows=[SuggestedFlow(**sf) for sf in suggested_flows],
)
@router.get("/conversations/{conversation_id}", response_model=CopilotConversationResponse)
async def get_conversation(
conversation_id: UUID,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get copilot conversation history."""
from sqlalchemy import select
from app.models.copilot_conversation import CopilotConversation
result = await db.execute(
select(CopilotConversation).where(
CopilotConversation.id == conversation_id,
CopilotConversation.user_id == current_user.id,
)
)
conversation = result.scalar_one_or_none()
if not conversation:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Conversation not found")
return CopilotConversationResponse.model_validate(conversation)

View File

@@ -1,3 +1,4 @@
import logging
from datetime import datetime, timezone
from typing import Annotated, Optional
from uuid import UUID
@@ -29,6 +30,7 @@ from app.core.audit import log_audit
from app.core.config import settings
from app.core.tree_validation import can_publish_tree
from app.core.step_sync import sync_steps_from_tree, deactivate_synced_steps_for_tree
from app.services.rag_service import index_tree as rag_index_tree
router = APIRouter(prefix="/trees", tags=["trees"])
@@ -542,6 +544,13 @@ async def create_tree(
)
tree = result.scalar_one()
# Index tree for RAG (best-effort, don't fail the request)
try:
await rag_index_tree(tree.id, db)
await db.commit()
except Exception:
logging.getLogger(__name__).warning("RAG indexing failed for tree %s", tree.id)
return build_full_tree_response(tree)
@@ -725,6 +734,13 @@ async def update_tree(
)
tree = result.scalar_one()
# Re-index tree for RAG (best-effort)
try:
await rag_index_tree(tree.id, db)
await db.commit()
except Exception:
logging.getLogger(__name__).warning("RAG re-indexing failed for tree %s", tree.id)
return build_full_tree_response(tree)

View File

@@ -8,6 +8,8 @@ from app.api.endpoints import feedback
from app.api.endpoints import ai_builder
from app.api.endpoints import ai_fix
from app.api.endpoints import ai_chat
from app.api.endpoints import copilot
from app.api.endpoints import assistant_chat
api_router = APIRouter()
@@ -40,3 +42,5 @@ api_router.include_router(feedback.router)
api_router.include_router(ai_builder.router)
api_router.include_router(ai_fix.router)
api_router.include_router(ai_chat.router)
api_router.include_router(copilot.router)
api_router.include_router(assistant_chat.router)