"""Tree embedding storage for RAG-powered AI assistant. Stores vector embeddings of tree content chunks for semantic search. Each tree is split into multiple chunks (node, solution, tree_summary) and embedded via Voyage AI for cosine similarity retrieval. """ import uuid from datetime import datetime, timezone from typing import Optional, Any from sqlalchemy import String, Text, DateTime, ForeignKey, Index from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.dialects.postgresql import UUID, JSONB from app.core.database import Base # pgvector column type — imported at runtime to avoid import errors # when pgvector is not installed locally try: from pgvector.sqlalchemy import Vector except ImportError: Vector = None class TreeEmbedding(Base): __tablename__ = "tree_embeddings" __table_args__ = ( Index("ix_tree_embeddings_account_id", "account_id"), Index("ix_tree_embeddings_tree_id", "tree_id"), ) id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 ) tree_id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), ForeignKey("trees.id", ondelete="CASCADE"), nullable=False, ) account_id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), ForeignKey("accounts.id", ondelete="CASCADE"), nullable=False, ) chunk_type: Mapped[str] = mapped_column( String(30), nullable=False, comment="node | solution | tree_summary", ) node_type: Mapped[Optional[str]] = mapped_column( String(30), nullable=True ) node_id: Mapped[Optional[str]] = mapped_column( String(100), nullable=True ) chunk_text: Mapped[str] = mapped_column(Text, nullable=False) embedding_model: Mapped[str] = mapped_column( String(50), nullable=False, default="voyage-3.5" ) # The embedding column is created via migration with vector(1024) type # We store it as a generic column here and handle it in queries meta: Mapped[dict[str, Any]] = mapped_column( JSONB, nullable=False, default=dict ) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) ) updated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc), )