feat: add AI assistant with in-session copilot and standalone chat with RAG

Implements three-phase AI assistant feature:
- Phase 0: RAG infrastructure with pgvector embeddings, Voyage AI integration,
  tree chunking service, and semantic search over team's flow library
- Phase 1: In-session copilot panel during flow navigation with contextual
  AI help, current step awareness, and suggested related flows
- Phase 2: Standalone AI chat page with persistent conversation history,
  pin/delete, and configurable retention policies (account-level)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Michael Chihlas
2026-03-04 01:36:36 -05:00
parent 41cb7956cb
commit 1aa60dada2
44 changed files with 3080 additions and 14 deletions

View File

@@ -10,6 +10,10 @@ from alembic import context
# Import your models
from app.core.database import Base
from app.models import User, Team, Tree, Session, Attachment, InviteCode
from app.models.email_verification_token import EmailVerificationToken
from app.models.tree_embedding import TreeEmbedding
from app.models.copilot_conversation import CopilotConversation
from app.models.assistant_chat import AssistantChat
from app.core.config import settings
# this is the Alembic Config object

View File

@@ -0,0 +1,51 @@
"""Add pgvector extension and tree_embeddings table.
Revision ID: 042
Revises: 041
Create Date: 2026-03-04
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers
revision: str = "042"
down_revision: str = "041"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Enable pgvector extension
op.execute("CREATE EXTENSION IF NOT EXISTS vector")
op.create_table(
"tree_embeddings",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("tree_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("trees.id", ondelete="CASCADE"), nullable=False),
sa.Column("account_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("accounts.id", ondelete="CASCADE"), nullable=True),
sa.Column("chunk_type", sa.String(30), nullable=False),
sa.Column("node_type", sa.String(30), nullable=True),
sa.Column("node_id", sa.String(100), nullable=True),
sa.Column("chunk_text", sa.Text(), nullable=False),
sa.Column("embedding_model", sa.String(50), nullable=False, server_default="voyage-3.5"),
sa.Column("embedding", sa.Column.__class__, nullable=True), # placeholder, replaced below
sa.Column("meta", postgresql.JSONB(), nullable=False, server_default=sa.text("'{}'::jsonb")),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()")),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()")),
)
# Drop the placeholder embedding column and add the vector column
op.drop_column("tree_embeddings", "embedding")
op.execute("ALTER TABLE tree_embeddings ADD COLUMN embedding vector(1024)")
# Indexes
op.create_index("ix_tree_embeddings_account_id", "tree_embeddings", ["account_id"])
op.create_index("ix_tree_embeddings_tree_id", "tree_embeddings", ["tree_id"])
def downgrade() -> None:
op.drop_table("tree_embeddings")
op.execute("DROP EXTENSION IF EXISTS vector")

View File

@@ -0,0 +1,39 @@
"""Add copilot_conversations table.
Revision ID: 043
Revises: 042
Create Date: 2026-03-04
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision: str = "043"
down_revision: str = "042"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"copilot_conversations",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("user_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("account_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("session_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sessions.id", ondelete="SET NULL"), nullable=True),
sa.Column("tree_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("trees.id", ondelete="CASCADE"), nullable=False),
sa.Column("messages", postgresql.JSONB(), nullable=False, server_default=sa.text("'[]'::jsonb")),
sa.Column("current_node_id", sa.String(100), nullable=True),
sa.Column("message_count", sa.Integer(), nullable=False, server_default=sa.text("0")),
sa.Column("total_input_tokens", sa.Integer(), nullable=False, server_default=sa.text("0")),
sa.Column("total_output_tokens", sa.Integer(), nullable=False, server_default=sa.text("0")),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()")),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()")),
sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False),
)
def downgrade() -> None:
op.drop_table("copilot_conversations")

View File

@@ -0,0 +1,37 @@
"""Add assistant_chats table.
Revision ID: 044
Revises: 043
Create Date: 2026-03-04
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision: str = "044"
down_revision: str = "043"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"assistant_chats",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("user_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("account_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False, index=True),
sa.Column("title", sa.String(255), nullable=False, server_default="New Chat"),
sa.Column("messages", postgresql.JSONB(), nullable=False, server_default=sa.text("'[]'::jsonb")),
sa.Column("message_count", sa.Integer(), nullable=False, server_default=sa.text("0")),
sa.Column("total_input_tokens", sa.Integer(), nullable=False, server_default=sa.text("0")),
sa.Column("total_output_tokens", sa.Integer(), nullable=False, server_default=sa.text("0")),
sa.Column("pinned", sa.Boolean(), nullable=False, server_default=sa.text("false")),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()")),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("now()")),
)
def downgrade() -> None:
op.drop_table("assistant_chats")

View File

@@ -0,0 +1,31 @@
"""Add chat retention settings to accounts.
Revision ID: 045
Revises: 044
Create Date: 2026-03-04
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
revision: str = "045"
down_revision: str = "044"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"accounts",
sa.Column("chat_retention_days", sa.Integer(), nullable=True, server_default=sa.text("90")),
)
op.add_column(
"accounts",
sa.Column("chat_retention_max_count", sa.Integer(), nullable=True, server_default=sa.text("100")),
)
def downgrade() -> None:
op.drop_column("accounts", "chat_retention_max_count")
op.drop_column("accounts", "chat_retention_days")

View File

@@ -0,0 +1,320 @@
"""Standalone AI assistant chat endpoints.
POST /assistant/chats — Create new chat
GET /assistant/chats — List chats (paginated, newest first)
GET /assistant/chats/{id} — Get chat with messages
POST /assistant/chats/{id}/messages — Send message
PATCH /assistant/chats/{id} — Update title, pin/unpin
DELETE /assistant/chats/{id} — Delete single chat
DELETE /assistant/chats — Bulk delete (older_than_days query param)
GET /assistant/retention — Get account retention settings
PATCH /assistant/retention — Update retention settings (owner only)
"""
import logging
from datetime import datetime, timezone, timedelta
from typing import Annotated, Optional
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from sqlalchemy import select, delete, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.rate_limit import limiter
from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
from app.core.config import settings
from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan
from app.models.user import User
from app.models.account import Account
from app.models.assistant_chat import AssistantChat
from app.schemas.assistant_chat import (
ChatCreateRequest,
ChatMessageRequest,
ChatMessageResponse,
ChatListResponse,
ChatDetailResponse,
ChatUpdateRequest,
RetentionSettingsResponse,
RetentionSettingsUpdate,
)
from app.schemas.copilot import SuggestedFlow
from app.services import assistant_chat_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/assistant", tags=["assistant-chat"])
def _require_ai_enabled() -> None:
if not settings.ai_enabled:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.",
)
@router.post("/chats", response_model=ChatDetailResponse, status_code=201)
@limiter.limit("10/minute")
async def create_chat(
request: Request,
data: ChatCreateRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Create a new empty chat conversation."""
chat = await assistant_chat_service.create_chat(
user_id=current_user.id,
account_id=current_user.account_id,
db=db,
)
await db.commit()
return ChatDetailResponse.model_validate(chat)
@router.get("/chats", response_model=list[ChatListResponse])
async def list_chats(
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
page: int = Query(1, ge=1),
size: int = Query(20, ge=1, le=100),
):
"""List user's chat conversations (newest first, pinned on top)."""
offset = (page - 1) * size
result = await db.execute(
select(AssistantChat)
.where(AssistantChat.user_id == current_user.id)
.order_by(AssistantChat.pinned.desc(), AssistantChat.updated_at.desc())
.offset(offset)
.limit(size)
)
chats = result.scalars().all()
return [ChatListResponse.model_validate(c) for c in chats]
@router.get("/chats/{chat_id}", response_model=ChatDetailResponse)
async def get_chat(
chat_id: UUID,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get a chat with full message history."""
result = await db.execute(
select(AssistantChat).where(
AssistantChat.id == chat_id,
AssistantChat.user_id == current_user.id,
)
)
chat = result.scalar_one_or_none()
if not chat:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
return ChatDetailResponse.model_validate(chat)
@router.post("/chats/{chat_id}/messages", response_model=ChatMessageResponse)
@limiter.limit("10/minute")
async def post_message(
request: Request,
chat_id: UUID,
data: ChatMessageRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Send a message and get AI response."""
_require_ai_enabled()
allowed, quota_status = await check_ai_quota(
user_id=current_user.id,
account_id=current_user.account_id,
db=db,
billing_anchor=current_user.ai_billing_cycle_anchor_at,
is_super_admin=current_user.is_super_admin,
)
if not allowed:
reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at"
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail={
"message": f"AI limit exceeded ({quota_status['deny_reason']})",
"reset_at": quota_status.get(reset_key),
"quota": quota_status,
},
)
plan = await get_user_plan(current_user.account_id, db)
try:
ai_content, suggested_flows, chat = await assistant_chat_service.send_message(
chat_id=chat_id,
user_id=current_user.id,
account_id=current_user.account_id,
message=data.message,
db=db,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
except Exception as e:
logger.exception("Assistant chat message failed: %s", e)
await record_ai_usage(
user_id=current_user.id,
account_id=current_user.account_id,
conversation_id=None,
generation_type="assistant_message",
tier=plan,
input_tokens=0,
output_tokens=0,
estimated_cost=0,
succeeded=False,
counts_toward_quota=False,
error_code=type(e).__name__,
extra_data={"assistant_chat_id": str(chat_id)},
db=db,
)
await db.commit()
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.",
)
await record_ai_usage(
user_id=current_user.id,
account_id=current_user.account_id,
conversation_id=None,
generation_type="assistant_message",
tier=plan,
input_tokens=chat.total_input_tokens,
output_tokens=chat.total_output_tokens,
estimated_cost=(
chat.total_input_tokens * 1.0 / 1_000_000
+ chat.total_output_tokens * 5.0 / 1_000_000
),
succeeded=True,
counts_toward_quota=False,
error_code=None,
extra_data={"assistant_chat_id": str(chat_id)},
db=db,
)
await db.commit()
return ChatMessageResponse(
content=ai_content,
suggested_flows=[SuggestedFlow(**sf) for sf in suggested_flows],
)
@router.patch("/chats/{chat_id}", response_model=ChatDetailResponse)
async def update_chat(
chat_id: UUID,
data: ChatUpdateRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Update chat title or pin/unpin."""
result = await db.execute(
select(AssistantChat).where(
AssistantChat.id == chat_id,
AssistantChat.user_id == current_user.id,
)
)
chat = result.scalar_one_or_none()
if not chat:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
if data.title is not None:
chat.title = data.title
if data.pinned is not None:
chat.pinned = data.pinned
await db.commit()
return ChatDetailResponse.model_validate(chat)
@router.delete("/chats/{chat_id}", status_code=204)
async def delete_chat(
chat_id: UUID,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Delete a single chat."""
result = await db.execute(
select(AssistantChat).where(
AssistantChat.id == chat_id,
AssistantChat.user_id == current_user.id,
)
)
chat = result.scalar_one_or_none()
if not chat:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chat not found")
await db.delete(chat)
await db.commit()
@router.delete("/chats", status_code=204)
async def bulk_delete_chats(
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
older_than_days: int = Query(..., ge=1),
):
"""Bulk delete chats older than N days (skips pinned)."""
cutoff = datetime.now(timezone.utc) - timedelta(days=older_than_days)
await db.execute(
delete(AssistantChat).where(
AssistantChat.user_id == current_user.id,
AssistantChat.pinned == False, # noqa: E712
AssistantChat.updated_at < cutoff,
)
)
await db.commit()
@router.get("/retention", response_model=RetentionSettingsResponse)
async def get_retention_settings(
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get account chat retention settings."""
result = await db.execute(
select(Account).where(Account.id == current_user.account_id)
)
account = result.scalar_one_or_none()
if not account:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Account not found")
return RetentionSettingsResponse(
chat_retention_days=account.chat_retention_days,
chat_retention_max_count=account.chat_retention_max_count,
)
@router.patch("/retention", response_model=RetentionSettingsResponse)
async def update_retention_settings(
data: RetentionSettingsUpdate,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Update account chat retention settings (account owner only)."""
result = await db.execute(
select(Account).where(Account.id == current_user.account_id)
)
account = result.scalar_one_or_none()
if not account:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Account not found")
if account.owner_id != current_user.id and not current_user.is_super_admin:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Only the account owner can update retention settings",
)
if data.chat_retention_days is not None:
account.chat_retention_days = data.chat_retention_days
if data.chat_retention_max_count is not None:
account.chat_retention_max_count = data.chat_retention_max_count
await db.commit()
return RetentionSettingsResponse(
chat_retention_days=account.chat_retention_days,
chat_retention_max_count=account.chat_retention_max_count,
)

View File

@@ -0,0 +1,190 @@
"""In-session copilot endpoints.
Contextual AI assistant during flow navigation:
POST /copilot/conversations — Start conversation (requires tree_id)
POST /copilot/conversations/{id}/messages — Send message, get response + suggestions
GET /copilot/conversations/{id} — Get conversation history
"""
import logging
from typing import Annotated
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Request, status
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.rate_limit import limiter
from app.api.deps import get_current_active_user, get_db, require_engineer_or_admin
from app.core.config import settings
from app.core.ai_quota_service import check_ai_quota, record_ai_usage, get_user_plan
from app.models.user import User
from app.schemas.copilot import (
CopilotStartRequest,
CopilotStartResponse,
CopilotMessageRequest,
CopilotMessageResponse,
CopilotConversationResponse,
SuggestedFlow,
)
from app.services import copilot_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/copilot", tags=["copilot"])
def _require_ai_enabled() -> None:
if not settings.ai_enabled:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="AI is not configured. Set GOOGLE_AI_API_KEY or ANTHROPIC_API_KEY.",
)
@router.post("/conversations", response_model=CopilotStartResponse, status_code=201)
@limiter.limit("10/minute")
async def start_conversation(
request: Request,
data: CopilotStartRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Start a new copilot conversation for a flow."""
_require_ai_enabled()
allowed, quota_status = await check_ai_quota(
user_id=current_user.id,
account_id=current_user.account_id,
db=db,
billing_anchor=current_user.ai_billing_cycle_anchor_at,
is_super_admin=current_user.is_super_admin,
)
if not allowed:
reset_key = "daily_reset_at" if quota_status.get("deny_reason") == "daily" else "monthly_reset_at"
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail={
"message": f"AI limit exceeded ({quota_status['deny_reason']})",
"reset_at": quota_status.get(reset_key),
"quota": quota_status,
},
)
try:
conversation, greeting = await copilot_service.start_conversation(
user_id=current_user.id,
account_id=current_user.account_id,
tree_id=data.tree_id,
session_id=data.session_id,
current_node_id=data.current_node_id,
db=db,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
except Exception as e:
logger.exception("Copilot conversation start failed: %s", e)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.",
)
await db.commit()
return CopilotStartResponse(
conversation_id=conversation.id,
greeting=greeting,
)
@router.post("/conversations/{conversation_id}/messages", response_model=CopilotMessageResponse)
@limiter.limit("10/minute")
async def post_message(
request: Request,
conversation_id: UUID,
data: CopilotMessageRequest,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
_: None = Depends(require_engineer_or_admin),
):
"""Send a message and get AI response with flow suggestions."""
_require_ai_enabled()
plan = await get_user_plan(current_user.account_id, db)
try:
ai_content, suggested_flows = await copilot_service.send_message(
conversation_id=conversation_id,
user_id=current_user.id,
message=data.message,
current_node_id=data.current_node_id,
db=db,
)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
except Exception as e:
logger.exception("Copilot message failed: %s", e)
await record_ai_usage(
user_id=current_user.id,
account_id=current_user.account_id,
conversation_id=None,
generation_type="copilot_message",
tier=plan,
input_tokens=0,
output_tokens=0,
estimated_cost=0,
succeeded=False,
counts_toward_quota=False,
error_code=type(e).__name__,
extra_data={"copilot_conversation_id": str(conversation_id)},
db=db,
)
await db.commit()
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"AI provider error ({type(e).__name__}). Please try again.",
)
await record_ai_usage(
user_id=current_user.id,
account_id=current_user.account_id,
conversation_id=None,
generation_type="copilot_message",
tier=plan,
input_tokens=0,
output_tokens=0,
estimated_cost=0,
succeeded=True,
counts_toward_quota=False,
error_code=None,
extra_data={"copilot_conversation_id": str(conversation_id)},
db=db,
)
await db.commit()
return CopilotMessageResponse(
content=ai_content,
suggested_flows=[SuggestedFlow(**sf) for sf in suggested_flows],
)
@router.get("/conversations/{conversation_id}", response_model=CopilotConversationResponse)
async def get_conversation(
conversation_id: UUID,
current_user: Annotated[User, Depends(get_current_active_user)],
db: Annotated[AsyncSession, Depends(get_db)],
):
"""Get copilot conversation history."""
from sqlalchemy import select
from app.models.copilot_conversation import CopilotConversation
result = await db.execute(
select(CopilotConversation).where(
CopilotConversation.id == conversation_id,
CopilotConversation.user_id == current_user.id,
)
)
conversation = result.scalar_one_or_none()
if not conversation:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Conversation not found")
return CopilotConversationResponse.model_validate(conversation)

View File

@@ -1,3 +1,4 @@
import logging
from datetime import datetime, timezone
from typing import Annotated, Optional
from uuid import UUID
@@ -29,6 +30,7 @@ from app.core.audit import log_audit
from app.core.config import settings
from app.core.tree_validation import can_publish_tree
from app.core.step_sync import sync_steps_from_tree, deactivate_synced_steps_for_tree
from app.services.rag_service import index_tree as rag_index_tree
router = APIRouter(prefix="/trees", tags=["trees"])
@@ -542,6 +544,13 @@ async def create_tree(
)
tree = result.scalar_one()
# Index tree for RAG (best-effort, don't fail the request)
try:
await rag_index_tree(tree.id, db)
await db.commit()
except Exception:
logging.getLogger(__name__).warning("RAG indexing failed for tree %s", tree.id)
return build_full_tree_response(tree)
@@ -725,6 +734,13 @@ async def update_tree(
)
tree = result.scalar_one()
# Re-index tree for RAG (best-effort)
try:
await rag_index_tree(tree.id, db)
await db.commit()
except Exception:
logging.getLogger(__name__).warning("RAG re-indexing failed for tree %s", tree.id)
return build_full_tree_response(tree)

View File

@@ -8,6 +8,8 @@ from app.api.endpoints import feedback
from app.api.endpoints import ai_builder
from app.api.endpoints import ai_fix
from app.api.endpoints import ai_chat
from app.api.endpoints import copilot
from app.api.endpoints import assistant_chat
api_router = APIRouter()
@@ -40,3 +42,5 @@ api_router.include_router(feedback.router)
api_router.include_router(ai_builder.router)
api_router.include_router(ai_fix.router)
api_router.include_router(ai_chat.router)
api_router.include_router(copilot.router)
api_router.include_router(assistant_chat.router)

View File

@@ -115,7 +115,7 @@ async def check_ai_quota(
select(func.count(AIUsage.id)).where(
AIUsage.user_id == user_id,
AIUsage.succeeded == True, # noqa: E712
AIUsage.generation_type.in_(["scaffold", "branch_detail", "chat_message", "chat_generate"]),
AIUsage.generation_type.in_(["scaffold", "branch_detail", "chat_message", "chat_generate", "copilot_message", "assistant_message"]),
AIUsage.created_at >= day_start,
)
) or 0

View File

@@ -84,6 +84,11 @@ class Settings(BaseSettings):
AI_MODEL_GEMINI: str = "gemini-2.5-flash"
AI_MODEL_ANTHROPIC: str = "claude-haiku-4-5-20251001"
# Embedding / RAG
VOYAGE_API_KEY: Optional[str] = None
EMBEDDING_MODEL: str = "voyage-3.5"
EMBEDDING_DIMENSIONS: int = 1024
@property
def ai_enabled(self) -> bool:
"""Check if any AI provider is configured."""

View File

@@ -14,6 +14,7 @@ from app.core.middleware import RequestLoggingMiddleware, ErrorLoggingMiddleware
from app.core.rate_limit import limiter
from app.api.router import api_router
from app.core.scheduler import scheduler, load_all_schedules, _cleanup_expired_ai_conversations
from app.services.retention_cleanup import cleanup_expired_chats
from app.core.service_account import ensure_service_account
# Initialize logging configuration
@@ -122,6 +123,15 @@ async def lifespan(app: FastAPI):
replace_existing=True,
)
# Chat retention cleanup (daily)
scheduler.add_job(
cleanup_expired_chats,
trigger="interval",
hours=24,
id="cleanup_expired_chats",
replace_existing=True,
)
# Auto-seed trees in background on PR environments
seed_task = None
if settings.SEED_ON_DEPLOY:

View File

@@ -29,6 +29,9 @@ from .feedback import Feedback
from .ai_conversation import AIConversation
from .ai_usage import AIUsage
from .ai_chat_session import AIChatSession
from .tree_embedding import TreeEmbedding
from .copilot_conversation import CopilotConversation
from .assistant_chat import AssistantChat
__all__ = [
"User",
@@ -69,4 +72,7 @@ __all__ = [
"AIConversation",
"AIUsage",
"AIChatSession",
"TreeEmbedding",
"CopilotConversation",
"AssistantChat",
]

View File

@@ -1,7 +1,7 @@
import uuid
from datetime import datetime, timezone
from typing import Optional, TYPE_CHECKING
from sqlalchemy import String, DateTime, ForeignKey, Boolean
from sqlalchemy import String, DateTime, ForeignKey, Boolean, Integer
from sqlalchemy.orm import Mapped, mapped_column, relationship
from sqlalchemy.dialects.postgresql import UUID
from app.core.database import Base
@@ -35,6 +35,14 @@ class Account(Base):
comment="Policy: engineers can create public shares. Only affects NEW shares (grandfathered)."
)
# Chat retention settings
chat_retention_days: Mapped[Optional[int]] = mapped_column(
Integer, nullable=True, default=90, server_default="90"
)
chat_retention_max_count: Mapped[Optional[int]] = mapped_column(
Integer, nullable=True, default=100, server_default="100"
)
# Relationships
owner: Mapped["User"] = relationship("User", foreign_keys=[owner_id], back_populates="owned_account")
users: Mapped[list["User"]] = relationship("User", foreign_keys="[User.account_id]", back_populates="account")

View File

@@ -0,0 +1,59 @@
"""Standalone AI assistant chat model.
Persistent conversation history for general IT questions with RAG context.
"""
import uuid
from datetime import datetime, timezone
from typing import Optional, Any
from sqlalchemy import String, DateTime, ForeignKey, Integer, Boolean
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import UUID, JSONB
from app.core.database import Base
class AssistantChat(Base):
__tablename__ = "assistant_chats"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
user_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
account_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("accounts.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
title: Mapped[str] = mapped_column(
String(255), nullable=False, default="New Chat"
)
messages: Mapped[list[dict[str, Any]]] = mapped_column(
JSONB, nullable=False, default=list
)
message_count: Mapped[int] = mapped_column(
Integer, nullable=False, default=0
)
total_input_tokens: Mapped[int] = mapped_column(
Integer, nullable=False, default=0
)
total_output_tokens: Mapped[int] = mapped_column(
Integer, nullable=False, default=0
)
pinned: Mapped[bool] = mapped_column(
Boolean, nullable=False, default=False
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc),
)

View File

@@ -0,0 +1,69 @@
"""Copilot conversation model for in-session AI assistant.
Tracks conversation state during flow navigation with contextual AI help.
"""
import uuid
from datetime import datetime, timezone
from typing import Optional, Any
from sqlalchemy import String, DateTime, ForeignKey, Integer
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import UUID, JSONB
from app.core.database import Base
class CopilotConversation(Base):
__tablename__ = "copilot_conversations"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
user_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("users.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
account_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("accounts.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
session_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True),
ForeignKey("sessions.id", ondelete="SET NULL"),
nullable=True,
)
tree_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("trees.id", ondelete="CASCADE"),
nullable=False,
)
messages: Mapped[list[dict[str, Any]]] = mapped_column(
JSONB, nullable=False, default=list
)
current_node_id: Mapped[Optional[str]] = mapped_column(
String(100), nullable=True
)
message_count: Mapped[int] = mapped_column(
Integer, nullable=False, default=0
)
total_input_tokens: Mapped[int] = mapped_column(
Integer, nullable=False, default=0
)
total_output_tokens: Mapped[int] = mapped_column(
Integer, nullable=False, default=0
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc),
)
expires_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False
)

View File

@@ -0,0 +1,72 @@
"""Tree embedding storage for RAG-powered AI assistant.
Stores vector embeddings of tree content chunks for semantic search.
Each tree is split into multiple chunks (node, solution, tree_summary)
and embedded via Voyage AI for cosine similarity retrieval.
"""
import uuid
from datetime import datetime, timezone
from typing import Optional, Any
from sqlalchemy import String, Text, DateTime, ForeignKey, Index
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import UUID, JSONB
from app.core.database import Base
# pgvector column type — imported at runtime to avoid import errors
# when pgvector is not installed locally
try:
from pgvector.sqlalchemy import Vector
except ImportError:
Vector = None
class TreeEmbedding(Base):
__tablename__ = "tree_embeddings"
__table_args__ = (
Index("ix_tree_embeddings_account_id", "account_id"),
Index("ix_tree_embeddings_tree_id", "tree_id"),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
tree_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("trees.id", ondelete="CASCADE"),
nullable=False,
)
account_id: Mapped[Optional[uuid.UUID]] = mapped_column(
UUID(as_uuid=True),
ForeignKey("accounts.id", ondelete="CASCADE"),
nullable=True,
)
chunk_type: Mapped[str] = mapped_column(
String(30),
nullable=False,
comment="node | solution | tree_summary",
)
node_type: Mapped[Optional[str]] = mapped_column(
String(30), nullable=True
)
node_id: Mapped[Optional[str]] = mapped_column(
String(100), nullable=True
)
chunk_text: Mapped[str] = mapped_column(Text, nullable=False)
embedding_model: Mapped[str] = mapped_column(
String(50), nullable=False, default="voyage-3.5"
)
# The embedding column is created via migration with vector(1024) type
# We store it as a generic column here and handle it in queries
meta: Mapped[dict[str, Any]] = mapped_column(
JSONB, nullable=False, default=dict
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc),
)

View File

@@ -0,0 +1,59 @@
"""Pydantic schemas for standalone AI assistant chat."""
from typing import Optional, Any
from uuid import UUID
from datetime import datetime
from pydantic import BaseModel, Field
from app.schemas.copilot import SuggestedFlow
class ChatCreateRequest(BaseModel):
"""Empty body — creates a new blank conversation."""
pass
class ChatMessageRequest(BaseModel):
message: str = Field(..., min_length=1, max_length=8000)
class ChatMessageResponse(BaseModel):
content: str
suggested_flows: list[SuggestedFlow] = []
class ChatListResponse(BaseModel):
id: UUID
title: str
message_count: int
pinned: bool
created_at: datetime
updated_at: datetime
model_config = {"from_attributes": True}
class ChatDetailResponse(BaseModel):
id: UUID
title: str
messages: list[dict[str, Any]]
message_count: int
pinned: bool
created_at: datetime
updated_at: datetime
model_config = {"from_attributes": True}
class ChatUpdateRequest(BaseModel):
title: Optional[str] = Field(None, min_length=1, max_length=255)
pinned: Optional[bool] = None
class RetentionSettingsResponse(BaseModel):
chat_retention_days: Optional[int]
chat_retention_max_count: Optional[int]
class RetentionSettingsUpdate(BaseModel):
chat_retention_days: Optional[int] = Field(None, ge=1, le=365)
chat_retention_max_count: Optional[int] = Field(None, ge=10, le=10000)

View File

@@ -0,0 +1,44 @@
"""Pydantic schemas for the in-session copilot."""
from typing import Optional, Any
from uuid import UUID
from datetime import datetime
from pydantic import BaseModel, Field
class SuggestedFlow(BaseModel):
tree_id: UUID
tree_name: str
tree_type: str
relevance_snippet: str
class CopilotStartRequest(BaseModel):
tree_id: UUID
session_id: Optional[UUID] = None
current_node_id: Optional[str] = None
class CopilotStartResponse(BaseModel):
conversation_id: UUID
greeting: str
class CopilotMessageRequest(BaseModel):
message: str = Field(..., min_length=1, max_length=4000)
current_node_id: Optional[str] = None
class CopilotMessageResponse(BaseModel):
content: str
suggested_flows: list[SuggestedFlow] = []
class CopilotConversationResponse(BaseModel):
id: UUID
tree_id: UUID
messages: list[dict[str, Any]]
current_node_id: Optional[str] = None
message_count: int
created_at: datetime
model_config = {"from_attributes": True}

View File

@@ -0,0 +1,152 @@
"""Standalone AI assistant chat service with RAG context.
Provides persistent conversation history for general IT questions
with semantic search over the team's flow library.
"""
import logging
from typing import Optional, Any
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.ai_provider import get_ai_provider
from app.models.assistant_chat import AssistantChat
from app.services import rag_service
logger = logging.getLogger(__name__)
ASSISTANT_SYSTEM_PROMPT = """You are a Senior Systems and Network Engineer with 15+ years of experience working in Managed Service Provider (MSP) environments. You specialize in:
- Windows Server, Active Directory, Group Policy, and Hybrid Identity (Entra ID)
- Networking (TCP/IP, DNS, DHCP, VPN, firewall troubleshooting, Cisco/Fortinet)
- Virtualization (VMware, Hyper-V) and cloud platforms (Azure, AWS, M365)
- Endpoint management, RMM tools, and PSA platforms (ConnectWise, Datto, Kaseya)
- PowerShell scripting and automation
When answering:
- Be direct and actionable — MSP engineers need fast, practical answers
- Include specific commands, paths, and config values when relevant
- Mention potential risks or gotchas before suggesting changes
- If a relevant troubleshooting flow exists in the team's library, reference it
- Keep responses concise but thorough — prefer bullet points and code blocks
- Format code with proper markdown code blocks
"""
def _build_rag_context(rag_results: list[dict[str, Any]]) -> str:
"""Format RAG results into a system prompt section."""
if not rag_results:
return ""
parts = ["\n--- RELEVANT FLOWS FROM TEAM LIBRARY ---"]
for r in rag_results[:5]:
parts.append(f"- [{r['tree_type']}] {r['tree_name']}: {r['chunk_text'][:200]}")
return "\n".join(parts)
def _extract_suggested_flows(rag_results: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Extract unique suggested flows from RAG results."""
seen: set[str] = set()
suggestions = []
for r in rag_results:
tid = r["tree_id"]
if tid in seen or r["similarity"] < 0.3:
continue
seen.add(tid)
suggestions.append({
"tree_id": tid,
"tree_name": r["tree_name"],
"tree_type": r["tree_type"],
"relevance_snippet": r["chunk_text"][:150],
})
return suggestions[:3]
def _auto_title(message: str) -> str:
"""Generate a short title from the first user message."""
title = message.strip()[:100]
if len(message) > 100:
title = title.rsplit(" ", 1)[0] + "..."
return title
async def create_chat(
user_id: UUID,
account_id: UUID,
db: AsyncSession,
) -> AssistantChat:
"""Create a new empty chat."""
chat = AssistantChat(
user_id=user_id,
account_id=account_id,
messages=[],
)
db.add(chat)
await db.flush()
return chat
async def send_message(
chat_id: UUID,
user_id: UUID,
account_id: UUID,
message: str,
db: AsyncSession,
) -> tuple[str, list[dict[str, Any]], AssistantChat]:
"""Send a user message and get AI response.
Returns (ai_content, suggested_flows, chat).
"""
result = await db.execute(
select(AssistantChat).where(
AssistantChat.id == chat_id,
AssistantChat.user_id == user_id,
)
)
chat = result.scalar_one_or_none()
if not chat:
raise ValueError("Chat not found")
# Auto-title from first message
if chat.message_count == 0:
chat.title = _auto_title(message)
# RAG search
rag_results = await rag_service.search(
query=message,
account_id=account_id,
db=db,
limit=8,
)
# Build system prompt
system_prompt = ASSISTANT_SYSTEM_PROMPT + _build_rag_context(rag_results)
# Build messages for AI
ai_messages = []
for msg in chat.messages:
if msg["role"] in ("user", "assistant"):
ai_messages.append({"role": msg["role"], "content": msg["content"]})
ai_messages.append({"role": "user", "content": message})
# Call AI
provider = get_ai_provider()
ai_content, input_tokens, output_tokens = await provider.generate_text(
system_prompt=system_prompt,
messages=ai_messages,
max_tokens=4096,
)
# Update chat
msgs = list(chat.messages)
msgs.append({"role": "user", "content": message})
msgs.append({"role": "assistant", "content": ai_content})
chat.messages = msgs
chat.message_count += 2
chat.total_input_tokens += input_tokens
chat.total_output_tokens += output_tokens
suggested_flows = _extract_suggested_flows(rag_results)
return ai_content, suggested_flows, chat

View File

@@ -0,0 +1,241 @@
"""Copilot service — in-session AI assistant with RAG context.
Builds system prompts with current flow context and RAG results,
manages conversation state, and returns AI responses with flow suggestions.
"""
import logging
from datetime import datetime, timezone, timedelta
from typing import Optional, Any
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.core.ai_provider import get_ai_provider
from app.models.tree import Tree
from app.models.copilot_conversation import CopilotConversation
from app.services import rag_service
logger = logging.getLogger(__name__)
COPILOT_SYSTEM_PROMPT = """You are a Senior Systems and Network Engineer with 15+ years of experience working in Managed Service Provider (MSP) environments. You specialize in:
- Windows Server, Active Directory, Group Policy, and Hybrid Identity (Entra ID)
- Networking (TCP/IP, DNS, DHCP, VPN, firewall troubleshooting, Cisco/Fortinet)
- Virtualization (VMware, Hyper-V) and cloud platforms (Azure, AWS, M365)
- Endpoint management, RMM tools, and PSA platforms (ConnectWise, Datto, Kaseya)
- PowerShell scripting and automation
You are acting as an in-session copilot while the user navigates a troubleshooting or procedural flow. You can see the flow context and their current position.
When answering:
- Be direct and actionable — MSP engineers need fast, practical answers
- Include specific commands, paths, and config values when relevant
- Mention potential risks or gotchas before suggesting changes
- If a relevant troubleshooting flow exists in the team's library, reference it
- Keep responses concise but thorough — prefer bullet points and code blocks
"""
def _build_flow_context(tree: Tree, current_node_id: Optional[str]) -> str:
"""Build flow context string for the system prompt."""
parts = [
f"\n--- CURRENT FLOW CONTEXT ---",
f"Flow: {tree.name}",
f"Type: {tree.tree_type}",
]
if tree.description:
parts.append(f"Description: {tree.description}")
if current_node_id and tree.tree_structure:
node = _find_node(tree.tree_structure, current_node_id)
if node:
parts.append(f"Current node type: {node.get('type', 'unknown')}")
parts.append(f"Current node: {node.get('content', node.get('label', 'Unknown'))}")
# Add options if it's a question/decision node
children = node.get("children", [])
if children and isinstance(children, list):
option_labels = [
c.get("label", c.get("content", ""))
for c in children if isinstance(c, dict)
]
if option_labels:
parts.append(f"Available options: {', '.join(option_labels)}")
return "\n".join(parts)
def _find_node(structure: dict, node_id: str) -> Optional[dict]:
"""Recursively find a node by ID in tree structure."""
if structure.get("id") == node_id:
return structure
for child in structure.get("children", []):
if isinstance(child, dict):
found = _find_node(child, node_id)
if found:
return found
# Check steps array for procedural flows
for step in structure.get("steps", []):
if isinstance(step, dict):
found = _find_node(step, node_id)
if found:
return found
return None
def _build_rag_context(rag_results: list[dict[str, Any]]) -> str:
"""Format RAG results into a system prompt section."""
if not rag_results:
return ""
parts = ["\n--- RELEVANT FLOWS FROM TEAM LIBRARY ---"]
for r in rag_results[:5]: # Cap at 5 for prompt size
parts.append(f"- [{r['tree_type']}] {r['tree_name']}: {r['chunk_text'][:200]}")
return "\n".join(parts)
def _extract_suggested_flows(
rag_results: list[dict[str, Any]],
exclude_tree_id: Optional[UUID] = None,
) -> list[dict[str, Any]]:
"""Extract unique suggested flows from RAG results."""
seen_tree_ids: set[str] = set()
suggestions = []
for r in rag_results:
tid = r["tree_id"]
if exclude_tree_id and tid == str(exclude_tree_id):
continue
if tid in seen_tree_ids:
continue
if r["similarity"] < 0.3:
continue
seen_tree_ids.add(tid)
suggestions.append({
"tree_id": tid,
"tree_name": r["tree_name"],
"tree_type": r["tree_type"],
"relevance_snippet": r["chunk_text"][:150],
})
return suggestions[:3]
async def start_conversation(
user_id: UUID,
account_id: UUID,
tree_id: UUID,
session_id: Optional[UUID],
current_node_id: Optional[str],
db: AsyncSession,
) -> tuple[CopilotConversation, str]:
"""Start a new copilot conversation.
Returns (conversation, greeting_message).
"""
# Load tree
result = await db.execute(
select(Tree).options(selectinload(Tree.tags)).where(Tree.id == tree_id)
)
tree = result.scalar_one_or_none()
if not tree:
raise ValueError(f"Tree {tree_id} not found")
conversation = CopilotConversation(
user_id=user_id,
account_id=account_id,
tree_id=tree_id,
session_id=session_id,
current_node_id=current_node_id,
messages=[],
expires_at=datetime.now(timezone.utc) + timedelta(hours=24),
)
db.add(conversation)
await db.flush()
greeting = f"I'm your copilot for this **{tree.tree_type}** flow: **{tree.name}**. Ask me anything about the current step, alternative approaches, or related troubleshooting tips."
conversation.messages = [{"role": "assistant", "content": greeting}]
conversation.message_count = 1
return conversation, greeting
async def send_message(
conversation_id: UUID,
user_id: UUID,
message: str,
current_node_id: Optional[str],
db: AsyncSession,
) -> tuple[str, list[dict[str, Any]]]:
"""Send a user message and get AI response.
Returns (ai_content, suggested_flows).
"""
result = await db.execute(
select(CopilotConversation).where(
CopilotConversation.id == conversation_id,
CopilotConversation.user_id == user_id,
)
)
conversation = result.scalar_one_or_none()
if not conversation:
raise ValueError("Conversation not found")
if conversation.expires_at < datetime.now(timezone.utc):
raise ValueError("Conversation has expired")
# Load tree for context
tree_result = await db.execute(
select(Tree).options(selectinload(Tree.tags)).where(Tree.id == conversation.tree_id)
)
tree = tree_result.scalar_one_or_none()
if not tree:
raise ValueError("Associated flow not found")
# Update current node
if current_node_id:
conversation.current_node_id = current_node_id
# RAG search
rag_results = await rag_service.search(
query=message,
account_id=conversation.account_id,
db=db,
limit=8,
)
# Build system prompt
system_prompt = COPILOT_SYSTEM_PROMPT
system_prompt += _build_flow_context(tree, conversation.current_node_id)
system_prompt += _build_rag_context(rag_results)
# Build messages for AI
ai_messages = []
for msg in conversation.messages:
if msg["role"] in ("user", "assistant"):
ai_messages.append({"role": msg["role"], "content": msg["content"]})
ai_messages.append({"role": "user", "content": message})
# Call AI
provider = get_ai_provider()
ai_content, input_tokens, output_tokens = await provider.generate_text(
system_prompt=system_prompt,
messages=ai_messages,
max_tokens=2048,
)
# Update conversation
msgs = list(conversation.messages)
msgs.append({"role": "user", "content": message})
msgs.append({"role": "assistant", "content": ai_content})
conversation.messages = msgs
conversation.message_count += 2
conversation.total_input_tokens += input_tokens
conversation.total_output_tokens += output_tokens
# Extract suggested flows
suggested_flows = _extract_suggested_flows(rag_results, exclude_tree_id=tree.id)
return ai_content, suggested_flows

View File

@@ -0,0 +1,78 @@
"""Embedding provider abstraction for RAG.
Uses Voyage AI (voyage-3.5, 1024 dims) as the embedding provider.
Supports document and query input types for asymmetric search.
"""
import logging
from typing import Optional
from app.core.config import settings
logger = logging.getLogger(__name__)
async def get_embedding(
text: str,
input_type: str = "document",
) -> Optional[list[float]]:
"""Get embedding vector for text using Voyage AI.
Args:
text: The text to embed.
input_type: "document" for indexing, "query" for search queries.
Returns:
List of floats (1024 dims) or None if embedding service unavailable.
"""
if not settings.VOYAGE_API_KEY:
logger.warning("VOYAGE_API_KEY not set — embedding service unavailable")
return None
try:
import voyageai
client = voyageai.AsyncClient(api_key=settings.VOYAGE_API_KEY)
result = await client.embed(
texts=[text],
model=settings.EMBEDDING_MODEL,
input_type=input_type,
)
return result.embeddings[0]
except Exception as e:
logger.error("Embedding failed: %s", e)
return None
async def get_embeddings_batch(
texts: list[str],
input_type: str = "document",
) -> Optional[list[list[float]]]:
"""Get embedding vectors for multiple texts in a single API call.
Args:
texts: List of texts to embed.
input_type: "document" for indexing, "query" for search queries.
Returns:
List of embedding vectors or None if service unavailable.
"""
if not texts:
return []
if not settings.VOYAGE_API_KEY:
logger.warning("VOYAGE_API_KEY not set — embedding service unavailable")
return None
try:
import voyageai
client = voyageai.AsyncClient(api_key=settings.VOYAGE_API_KEY)
result = await client.embed(
texts=texts,
model=settings.EMBEDDING_MODEL,
input_type=input_type,
)
return result.embeddings
except Exception as e:
logger.error("Batch embedding failed: %s", e)
return None

View File

@@ -0,0 +1,170 @@
"""RAG service — index trees and search embeddings for AI context.
Orchestrates tree chunking, embedding, and semantic search over the
team's flow library via pgvector cosine similarity.
"""
import logging
from typing import Optional, Any
from uuid import UUID
from sqlalchemy import text, delete
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.tree import Tree
from app.models.tree_embedding import TreeEmbedding
from app.services.embedding_service import get_embedding, get_embeddings_batch
from app.services.tree_chunker import chunk_tree
logger = logging.getLogger(__name__)
async def index_tree(tree_id: UUID, db: AsyncSession) -> int:
"""Chunk and embed a tree, storing results in tree_embeddings.
Deletes existing embeddings for this tree before re-indexing.
Returns the number of chunks indexed.
"""
from sqlalchemy import select
from sqlalchemy.orm import selectinload
result = await db.execute(
select(Tree)
.options(selectinload(Tree.tags))
.where(Tree.id == tree_id)
)
tree = result.scalar_one_or_none()
if not tree:
logger.warning("index_tree: tree %s not found", tree_id)
return 0
# Delete existing embeddings
await db.execute(
delete(TreeEmbedding).where(TreeEmbedding.tree_id == tree_id)
)
# Chunk the tree
tag_names = [t.name for t in tree.tags] if tree.tags else []
chunks = chunk_tree(
tree_name=tree.name,
tree_type=tree.tree_type,
description=tree.description,
tags=tag_names,
tree_structure=tree.tree_structure,
)
if not chunks:
logger.info("index_tree: no chunks for tree %s", tree_id)
return 0
# Get embeddings for all chunks in batch
texts = [c["chunk_text"] for c in chunks]
embeddings = await get_embeddings_batch(texts, input_type="document")
if embeddings is None:
logger.warning("index_tree: embedding service unavailable for tree %s", tree_id)
return 0
# Insert embeddings
for chunk, embedding in zip(chunks, embeddings):
embedding_str = "[" + ",".join(str(v) for v in embedding) + "]"
await db.execute(
text("""
INSERT INTO tree_embeddings
(tree_id, account_id, chunk_type, node_type, node_id, chunk_text, embedding_model, embedding, meta)
VALUES
(:tree_id, :account_id, :chunk_type, :node_type, :node_id, :chunk_text, :embedding_model, :embedding::vector, :meta::jsonb)
"""),
{
"tree_id": str(tree_id),
"account_id": str(tree.account_id) if tree.account_id else None,
"chunk_type": chunk["chunk_type"],
"node_type": chunk.get("node_type"),
"node_id": chunk.get("node_id"),
"chunk_text": chunk["chunk_text"],
"embedding_model": "voyage-3.5",
"embedding": embedding_str,
"meta": "{}",
},
)
logger.info("index_tree: indexed %d chunks for tree %s", len(chunks), tree_id)
return len(chunks)
async def delete_tree_embeddings(tree_id: UUID, db: AsyncSession) -> None:
"""Delete all embeddings for a tree."""
await db.execute(
delete(TreeEmbedding).where(TreeEmbedding.tree_id == tree_id)
)
async def search(
query: str,
account_id: UUID,
db: AsyncSession,
limit: int = 8,
exclude_tree_id: Optional[UUID] = None,
) -> list[dict[str, Any]]:
"""Semantic search over team's flow library.
Args:
query: Natural language search query.
account_id: Scope search to team's flows.
db: Database session.
limit: Max results to return.
exclude_tree_id: Exclude chunks from this tree (for copilot context).
Returns:
List of dicts with tree_id, tree_name, tree_type, chunk_text, chunk_type, similarity.
"""
query_embedding = await get_embedding(query, input_type="query")
if query_embedding is None:
return []
embedding_str = "[" + ",".join(str(v) for v in query_embedding) + "]"
exclude_clause = ""
params: dict[str, Any] = {
"embedding": embedding_str,
"account_id": str(account_id),
"limit": limit,
}
if exclude_tree_id:
exclude_clause = "AND te.tree_id != :exclude_tree_id"
params["exclude_tree_id"] = str(exclude_tree_id)
result = await db.execute(
text(f"""
SELECT
te.tree_id,
t.name as tree_name,
t.tree_type,
te.chunk_text,
te.chunk_type,
te.node_id,
1 - (te.embedding <=> :embedding::vector) as similarity
FROM tree_embeddings te
JOIN trees t ON t.id = te.tree_id
WHERE te.account_id = :account_id
AND t.deleted_at IS NULL
{exclude_clause}
ORDER BY te.embedding <=> :embedding::vector
LIMIT :limit
"""),
params,
)
rows = result.mappings().all()
return [
{
"tree_id": str(row["tree_id"]),
"tree_name": row["tree_name"],
"tree_type": row["tree_type"],
"chunk_text": row["chunk_text"],
"chunk_type": row["chunk_type"],
"node_id": row["node_id"],
"similarity": float(row["similarity"]),
}
for row in rows
]

View File

@@ -0,0 +1,84 @@
"""Chat retention cleanup job.
Runs daily via APScheduler to enforce account-level retention settings:
- Delete non-pinned chats older than chat_retention_days
- Delete oldest non-pinned chats when count exceeds chat_retention_max_count
"""
import logging
from datetime import datetime, timezone, timedelta
from sqlalchemy import select, delete, func
from app.core.database import async_session_maker
from app.models.account import Account
from app.models.assistant_chat import AssistantChat
logger = logging.getLogger(__name__)
async def cleanup_expired_chats() -> None:
"""Enforce chat retention policies for all accounts."""
async with async_session_maker() as db:
try:
result = await db.execute(select(Account))
accounts = result.scalars().all()
total_deleted = 0
for account in accounts:
deleted = await _cleanup_account_chats(account, db)
total_deleted += deleted
await db.commit()
if total_deleted > 0:
logger.info("[retention] Cleaned up %d expired chats", total_deleted)
except Exception as e:
logger.error("[retention] Chat cleanup failed: %s", e)
await db.rollback()
async def _cleanup_account_chats(account: Account, db) -> int:
"""Enforce retention for a single account. Returns count deleted."""
deleted = 0
# Age-based retention
if account.chat_retention_days:
cutoff = datetime.now(timezone.utc) - timedelta(days=account.chat_retention_days)
result = await db.execute(
delete(AssistantChat)
.where(
AssistantChat.account_id == account.id,
AssistantChat.pinned == False, # noqa: E712
AssistantChat.updated_at < cutoff,
)
.returning(AssistantChat.id)
)
deleted += len(result.all())
# Count-based retention
if account.chat_retention_max_count:
total = await db.scalar(
select(func.count(AssistantChat.id)).where(
AssistantChat.account_id == account.id,
)
) or 0
if total > account.chat_retention_max_count:
excess = total - account.chat_retention_max_count
# Get oldest non-pinned chat IDs
oldest = await db.execute(
select(AssistantChat.id)
.where(
AssistantChat.account_id == account.id,
AssistantChat.pinned == False, # noqa: E712
)
.order_by(AssistantChat.updated_at.asc())
.limit(excess)
)
ids_to_delete = [row[0] for row in oldest.all()]
if ids_to_delete:
await db.execute(
delete(AssistantChat).where(AssistantChat.id.in_(ids_to_delete))
)
deleted += len(ids_to_delete)
return deleted

View File

@@ -0,0 +1,165 @@
"""Tree chunker — converts tree_structure JSON into embeddable text chunks.
Produces three chunk types:
- tree_summary: Name + description + tags + type overview
- node: Individual node content with breadcrumb path context
- solution: Full solution/action text with path context
"""
import logging
from typing import Any
logger = logging.getLogger(__name__)
def _get_breadcrumb(node: dict, parent_path: str = "") -> str:
"""Build a breadcrumb path string for a node."""
content = node.get("content", node.get("label", ""))[:80]
if parent_path:
return f"{parent_path} > {content}"
return content
def _chunk_node(
node: dict,
tree_name: str,
tree_type: str,
tags: list[str],
parent_path: str = "",
) -> list[dict[str, Any]]:
"""Recursively chunk a node and its children."""
chunks = []
node_type = node.get("type", "unknown")
node_id = node.get("id", "")
content = node.get("content", node.get("label", ""))
breadcrumb = _get_breadcrumb(node, parent_path)
# Build chunk text based on node type
if node_type in ("question", "decision"):
options = node.get("children", [])
option_labels = [
child.get("label", child.get("content", ""))[:100]
for child in options
if isinstance(child, dict)
]
text_parts = [
f"[{node_type}] {content}",
]
if option_labels:
text_parts.append(f"Options: {', '.join(option_labels)}")
text_parts.append(f"Path: {breadcrumb}")
text_parts.append(f"Flow: {tree_name} | Type: {tree_type}")
if tags:
text_parts.append(f"Tags: {', '.join(tags)}")
chunks.append({
"chunk_type": "node",
"node_type": node_type,
"node_id": node_id,
"chunk_text": "\n".join(text_parts),
})
elif node_type in ("action", "solution", "info", "warning"):
text_parts = [
f"[{node_type}] {content}",
f"Path: {breadcrumb}",
f"Flow: {tree_name} | Type: {tree_type}",
]
if tags:
text_parts.append(f"Tags: {', '.join(tags)}")
chunk_type = "solution" if node_type == "solution" else "node"
chunks.append({
"chunk_type": chunk_type,
"node_type": node_type,
"node_id": node_id,
"chunk_text": "\n".join(text_parts),
})
elif node_type in ("step", "section_header"):
text_parts = [
f"[{node_type}] {content}",
f"Path: {breadcrumb}",
f"Flow: {tree_name} | Type: {tree_type}",
]
if node.get("description"):
text_parts.insert(1, node["description"])
if tags:
text_parts.append(f"Tags: {', '.join(tags)}")
chunks.append({
"chunk_type": "node",
"node_type": node_type,
"node_id": node_id,
"chunk_text": "\n".join(text_parts),
})
# Recurse into children
children = node.get("children", [])
if isinstance(children, list):
for child in children:
if isinstance(child, dict):
chunks.extend(
_chunk_node(child, tree_name, tree_type, tags, breadcrumb)
)
# Follow next_node_id linked nodes (action nodes)
# These are handled at the tree level, not recursively
return chunks
def chunk_tree(
tree_name: str,
tree_type: str,
description: str | None,
tags: list[str],
tree_structure: dict[str, Any],
) -> list[dict[str, Any]]:
"""Convert a tree into embeddable text chunks.
Args:
tree_name: Name of the flow.
tree_type: troubleshooting | procedural | maintenance.
description: Flow description.
tags: List of tag names.
tree_structure: The tree_structure JSONB content.
Returns:
List of chunk dicts with keys: chunk_type, node_type, node_id, chunk_text.
"""
chunks = []
# Tree summary chunk
summary_parts = [
f"Flow: {tree_name}",
f"Type: {tree_type}",
]
if description:
summary_parts.append(f"Description: {description}")
if tags:
summary_parts.append(f"Tags: {', '.join(tags)}")
chunks.append({
"chunk_type": "tree_summary",
"node_type": None,
"node_id": None,
"chunk_text": "\n".join(summary_parts),
})
# Chunk the tree structure nodes
root = tree_structure
if isinstance(root, dict):
# Handle both flat structure and nested
if "children" in root or "type" in root:
chunks.extend(
_chunk_node(root, tree_name, tree_type, tags)
)
# Handle steps array (procedural flows)
if "steps" in root and isinstance(root["steps"], list):
for step in root["steps"]:
if isinstance(step, dict):
chunks.extend(
_chunk_node(step, tree_name, tree_type, tags)
)
return chunks

View File

@@ -35,6 +35,10 @@ httpx>=0.27.0
anthropic>=0.40.0
google-genai>=1.0.0
# RAG / Embeddings
pgvector>=0.3.6
voyageai>=0.3.0
# Utilities
python-dotenv==1.0.1
croniter>=2.0.0